From e610420ca5c922455fa7952bbaaa6f48f33714bc Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Wed, 30 Apr 2025 17:18:19 +0100 Subject: [PATCH 01/37] First steps. --- Cargo.lock | 2 + crates/opte-api/Cargo.toml | 1 + crates/opte-api/src/lib.rs | 4 +- crates/opte-api/src/stat.rs | 37 ++++ lib/opte/Cargo.toml | 1 + lib/opte/src/engine/mod.rs | 1 + lib/opte/src/engine/port/mod.rs | 5 + lib/opte/src/engine/stat.rs | 328 ++++++++++++++++++++++++++++++++ 8 files changed, 378 insertions(+), 1 deletion(-) create mode 100644 crates/opte-api/src/stat.rs create mode 100644 lib/opte/src/engine/stat.rs diff --git a/Cargo.lock b/Cargo.lock index 9bb90570..c113b8fe 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1237,6 +1237,7 @@ dependencies = [ "smoltcp", "tabwriter", "usdt", + "uuid", "version_check", "zerocopy 0.8.25", ] @@ -1251,6 +1252,7 @@ dependencies = [ "postcard", "serde", "smoltcp", + "uuid", ] [[package]] diff --git a/crates/opte-api/Cargo.toml b/crates/opte-api/Cargo.toml index 7c4d2e60..34c47a95 100644 --- a/crates/opte-api/Cargo.toml +++ b/crates/opte-api/Cargo.toml @@ -17,6 +17,7 @@ ingot.workspace = true ipnetwork = { workspace = true, optional = true } postcard.workspace = true serde.workspace = true +uuid.workspace = true [dependencies.smoltcp] workspace = true diff --git a/crates/opte-api/src/lib.rs b/crates/opte-api/src/lib.rs index 2a2ed860..bea2b841 100644 --- a/crates/opte-api/src/lib.rs +++ b/crates/opte-api/src/lib.rs @@ -28,6 +28,7 @@ pub mod encap; pub mod ip; pub mod mac; pub mod ndp; +pub mod stat; pub mod tcp; pub mod ulp; @@ -38,6 +39,7 @@ pub use encap::*; pub use ip::*; pub use mac::*; pub use ndp::*; +pub use stat::*; pub use tcp::*; pub use ulp::*; @@ -51,7 +53,7 @@ pub use ulp::*; /// /// We rely on CI and the check-api-version.sh script to verify that /// this number is incremented anytime the oxide-api code changes. -pub const API_VERSION: u64 = 36; +pub const API_VERSION: u64 = 37; /// Major version of the OPTE package. pub const MAJOR_VERSION: u64 = 0; diff --git a/crates/opte-api/src/stat.rs b/crates/opte-api/src/stat.rs new file mode 100644 index 00000000..0f847802 --- /dev/null +++ b/crates/opte-api/src/stat.rs @@ -0,0 +1,37 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +// Copyright 2025 Oxide Computer Company + +//! Types for handling flow stats from the ioctl API. + +use crate::Direction; +use alloc::vec::Vec; +use serde::Deserialize; +use serde::Serialize; +use uuid::Uuid; + +#[derive(Deserialize, Serialize)] +pub struct FlowStat { + pub partner: FlowId, + pub dir: Direction, + pub bases: Vec, + pub stats: PacketCounter, +} + +#[derive(Deserialize, Serialize)] +pub struct PacketCounter { + pub pkts_in: u64, + pub bytes_in: u64, + pub pkts_out: u64, + pub bytes_out: u64, +} + +#[derive(Deserialize, Serialize)] +pub struct FullCounter { + pub allow: u64, + pub deny: u64, + pub hairpin: u64, + pub packets: PacketCounter, +} diff --git a/lib/opte/Cargo.toml b/lib/opte/Cargo.toml index 90f0ab5f..3a7ca01a 100644 --- a/lib/opte/Cargo.toml +++ b/lib/opte/Cargo.toml @@ -47,6 +47,7 @@ itertools = { workspace = true, optional = true } postcard.workspace = true serde.workspace = true tabwriter = { workspace = true, optional = true } +uuid.workspace = true usdt = { workspace = true, optional = true } zerocopy = { workspace = true, optional = true } diff --git a/lib/opte/src/engine/mod.rs b/lib/opte/src/engine/mod.rs index 603f51b3..08793d3e 100644 --- a/lib/opte/src/engine/mod.rs +++ b/lib/opte/src/engine/mod.rs @@ -28,6 +28,7 @@ pub mod port; pub mod predicate; pub mod rule; pub mod snat; +pub mod stat; #[macro_use] pub mod tcp; pub mod tcp_state; diff --git a/lib/opte/src/engine/port/mod.rs b/lib/opte/src/engine/port/mod.rs index aaec912c..7b5015a6 100644 --- a/lib/opte/src/engine/port/mod.rs +++ b/lib/opte/src/engine/port/mod.rs @@ -44,6 +44,7 @@ use super::rule::HdrTransform; use super::rule::HdrTransformError; use super::rule::Rule; use super::rule::TransformFlags; +use super::stat::StatTree; use super::tcp::KEEPALIVE_EXPIRE_TTL; use super::tcp::TIME_WAIT_EXPIRE_TTL; use super::tcp_state::TcpFlowState; @@ -360,6 +361,8 @@ impl PortBuilder { stats: KStatNamed::new("xde", &self.name, PortStats::new())?, net, data, + + flow_stats: Default::default(), }) } @@ -755,6 +758,8 @@ pub struct Port { stats: KStatNamed, net: N, data: KRwLock, + + flow_stats: StatTree, } // Convert: diff --git a/lib/opte/src/engine/stat.rs b/lib/opte/src/engine/stat.rs new file mode 100644 index 00000000..2afa2595 --- /dev/null +++ b/lib/opte/src/engine/stat.rs @@ -0,0 +1,328 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +// Copyright 2025 Oxide Computer Company + +//! Flow stat objects modified and tracked as rules and entries are used. + +use crate::api::InnerFlowId; +use crate::ddi::sync::KRwLock; +use crate::ddi::sync::KRwLockType; +use crate::ddi::time::Moment; +use alloc::collections::BTreeMap; +use alloc::collections::BTreeSet; +use alloc::collections::btree_map::Entry; +use alloc::sync::Arc; +use alloc::sync::Weak; +use alloc::vec::Vec; +use core::sync::atomic::AtomicU64; +use core::sync::atomic::Ordering; +use opte_api::Direction; +use opte_api::FlowStat as ApiFlowStat; +use opte_api::FullCounter as ApiFullCounter; +use opte_api::PacketCounter as ApiPktCounter; +use opte_api::TcpState; +use uuid::Uuid; + +/// Opaque identifier for tracking unique stat objects. +#[derive(Copy, Clone, Hash, PartialEq, PartialOrd, Eq, Ord)] +pub struct StatId(u64); + +pub struct FlowStat { + /// The direction of this flow half. + pub dir: Direction, + /// The other half of this flow. + pub partner: InnerFlowId, + /// `TableStat`s to whom we must return our own `stats`. + pub parents: Vec>, + /// The cached list of IDs of root `TableStat` entries. + pub bases: BTreeSet, + + /// Actual stats associated with this flow. + pub shared: Arc, + + /// When was this flow last updated? + pub last_hit: AtomicU64, +} + +pub struct SharedFlowStat { + /// Actual stats associated with this flow. + pub stats: PacketCounter, + + /// Tcp? + /// + /// Yeah this needs some rework wrt today... + pub tcp: Option, + + /// パケットはどちらにきましたか。 + pub first_dir: Direction, +} + +impl From<&FlowStat> for ApiFlowStat { + fn from(value: &FlowStat) -> Self { + ApiFlowStat { + partner: value.partner, + dir: value.dir, + bases: value.bases.iter().copied().collect(), + stats: (&value.shared.stats).into(), + } + } +} + +pub struct TableStat { + pub id: Option, + + pub parents: Vec>, + pub children: KRwLock>>, + + /// The actual stats! + pub stats: Arc, + + /// When was this flow last updated? + pub last_hit: AtomicU64, +} + +pub struct PacketCounter { + pub id: StatId, + + pub pkts_in: AtomicU64, + pub bytes_in: AtomicU64, + pub pkts_out: AtomicU64, + pub bytes_out: AtomicU64, +} + +impl From<&PacketCounter> for ApiPktCounter { + fn from(val: &PacketCounter) -> Self { + ApiPktCounter { + pkts_in: val.pkts_in.load(Ordering::Relaxed), + bytes_in: val.bytes_in.load(Ordering::Relaxed), + pkts_out: val.pkts_out.load(Ordering::Relaxed), + bytes_out: val.bytes_out.load(Ordering::Relaxed), + } + } +} + +pub struct FullCounter { + pub allow: AtomicU64, + pub deny: AtomicU64, + pub hairpin: AtomicU64, + pub packets: PacketCounter, +} + +impl From<&FullCounter> for ApiFullCounter { + fn from(val: &FullCounter) -> Self { + ApiFullCounter { + packets: (&val.packets).into(), + allow: val.allow.load(Ordering::Relaxed), + deny: val.deny.load(Ordering::Relaxed), + hairpin: val.hairpin.load(Ordering::Relaxed), + } + } +} + +pub trait FoldStat: Send + Sync { + fn fold(&self, into: &FullCounter, visited: &mut BTreeSet); +} + +impl FoldStat for FlowStat { + fn fold(&self, into: &FullCounter, visited: &mut BTreeSet) { + if !visited.insert(self.shared.stats.id) { + self.shared.stats.combine(&into.packets); + } + } +} + +impl FoldStat for TableStat { + fn fold(&self, into: &FullCounter, visited: &mut BTreeSet) { + if !visited.insert(self.stats.id()) { + self.stats.combine(into); + } + } +} + +impl PacketCounter { + fn combine(&self, into: &Self) { + into.pkts_in + .fetch_add(self.pkts_in.load(Ordering::Relaxed), Ordering::Relaxed); + into.bytes_in.fetch_add( + self.bytes_in.load(Ordering::Relaxed), + Ordering::Relaxed, + ); + into.pkts_out.fetch_add( + self.pkts_out.load(Ordering::Relaxed), + Ordering::Relaxed, + ); + into.bytes_out.fetch_add( + self.bytes_out.load(Ordering::Relaxed), + Ordering::Relaxed, + ); + } +} + +impl FullCounter { + fn combine(&self, into: &Self) { + into.packets.combine(&self.packets); + into.allow + .fetch_add(self.allow.load(Ordering::Relaxed), Ordering::Relaxed); + into.deny + .fetch_add(self.deny.load(Ordering::Relaxed), Ordering::Relaxed); + into.hairpin + .fetch_add(self.hairpin.load(Ordering::Relaxed), Ordering::Relaxed); + } + + #[inline] + fn id(&self) -> StatId { + self.packets.id + } +} + +/// Tracking/handling of all stats. +/// +/// ?? Describe? +#[derive(Default)] +pub struct StatTree { + next_id: u64, + roots: BTreeMap>, + intermediate: Vec>, + flows: BTreeMap>, +} + +impl StatTree { + fn get_id(&mut self) -> StatId { + let out = self.next_id; + self.next_id += 1; + StatId(out) + } + + fn pkt_counter(&mut self) -> PacketCounter { + PacketCounter { + id: self.get_id(), + pkts_in: 0.into(), + bytes_in: 0.into(), + pkts_out: 0.into(), + bytes_out: 0.into(), + } + } + + fn full_counter(&mut self) -> FullCounter { + FullCounter { + allow: 0.into(), + deny: 0.into(), + hairpin: 0.into(), + packets: self.pkt_counter(), + } + } + + pub fn new_root(&mut self) -> Arc { + // TODO: RNG in illumos kernel? + let uuid = Uuid::from_u64_pair(0, self.next_id); + self.new_root_with_id(uuid) + } + + pub fn new_root_with_id(&mut self, uuid: Uuid) -> Arc { + let mut children = KRwLock::new(vec![]); + children.init(KRwLockType::Driver); + + let out = Arc::new(TableStat { + id: Some(uuid), + parents: vec![], + children, + stats: self.full_counter().into(), + last_hit: Moment::now().raw().into(), + }); + + // TODO: what if already exists?! + let _ = self.roots.insert(uuid, out.clone()); + + out + } + + pub fn new_intermediate( + &mut self, + parents: Vec>, + ) -> Arc { + let mut children = KRwLock::new(vec![]); + children.init(KRwLockType::Driver); + + let out = Arc::new(TableStat { + id: None, + parents, + children, + stats: self.full_counter().into(), + last_hit: Moment::now().raw().into(), + }); + + for parent in &out.parents { + let mut p_children = parent.children.write(); + let weak = Arc::downgrade(&out); + p_children.push(weak); + } + + self.intermediate.push(out.clone()); + + out + } + + pub fn new_flow( + &mut self, + flow_id: &InnerFlowId, + partner_flow: &InnerFlowId, + dir: Direction, + parents: Vec>, + ) -> Arc { + if let Entry::Occupied(e) = self.flows.entry(*flow_id) { + // TODO: what to do with (maybe new) parents & bases?! + return e.get().clone(); + } + + let bases = get_base_ids(&parents); + + let out = match self.flows.entry(*partner_flow) { + // Miss, but existing partner. + Entry::Occupied(partner) => Arc::new(FlowStat { + dir, + partner: *partner_flow, + parents, + bases, + shared: partner.get().shared.clone(), + last_hit: Moment::now().raw().into(), + }), + // Miss, no partner. + Entry::Vacant(_) => { + Arc::new(FlowStat { + dir, + partner: *partner_flow, + parents, + bases, + shared: Arc::new(SharedFlowStat { + stats: self.pkt_counter(), + // TODO + tcp: None, + first_dir: dir, + }), + last_hit: Moment::now().raw().into(), + }) + } + }; + + self.flows + .insert(*flow_id, out) + .expect("Proven a miss on flow_id already") + .clone() + } +} + +fn get_base_ids(parents: &[Arc]) -> BTreeSet { + let mut out = BTreeSet::new(); + + let mut work_set = parents.to_vec(); + while let Some(el) = work_set.pop() { + work_set.extend_from_slice(&el.parents); + if let Some(id) = el.id { + out.insert(id); + } + } + + out +} From 3c2750d3132b1ca96cd369f29115dca10efb0aee Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Thu, 1 May 2025 11:48:14 +0100 Subject: [PATCH 02/37] More primitives for Doing The Work. --- crates/opte-api/src/stat.rs | 6 +- lib/opte/src/engine/port/mod.rs | 11 ++ lib/opte/src/engine/stat.rs | 326 ++++++++++++++++++++++++-------- 3 files changed, 258 insertions(+), 85 deletions(-) diff --git a/crates/opte-api/src/stat.rs b/crates/opte-api/src/stat.rs index 0f847802..f060ee10 100644 --- a/crates/opte-api/src/stat.rs +++ b/crates/opte-api/src/stat.rs @@ -12,7 +12,7 @@ use serde::Deserialize; use serde::Serialize; use uuid::Uuid; -#[derive(Deserialize, Serialize)] +#[derive(Deserialize, Serialize, Debug, Clone)] pub struct FlowStat { pub partner: FlowId, pub dir: Direction, @@ -20,7 +20,7 @@ pub struct FlowStat { pub stats: PacketCounter, } -#[derive(Deserialize, Serialize)] +#[derive(Deserialize, Serialize, Debug, Clone, Copy)] pub struct PacketCounter { pub pkts_in: u64, pub bytes_in: u64, @@ -28,7 +28,7 @@ pub struct PacketCounter { pub bytes_out: u64, } -#[derive(Deserialize, Serialize)] +#[derive(Deserialize, Serialize, Debug, Clone, Copy)] pub struct FullCounter { pub allow: u64, pub deny: u64, diff --git a/lib/opte/src/engine/port/mod.rs b/lib/opte/src/engine/port/mod.rs index 7b5015a6..f7373e26 100644 --- a/lib/opte/src/engine/port/mod.rs +++ b/lib/opte/src/engine/port/mod.rs @@ -44,6 +44,7 @@ use super::rule::HdrTransform; use super::rule::HdrTransformError; use super::rule::Rule; use super::rule::TransformFlags; +use super::stat::Action as StatAction; use super::stat::StatTree; use super::tcp::KEEPALIVE_EXPIRE_TTL; use super::tcp::TIME_WAIT_EXPIRE_TTL; @@ -199,6 +200,16 @@ enum InternalProcessResult { Hairpin(MsgBlk), } +impl InternalProcessResult { + fn stat_action(&self) -> StatAction { + match self { + Self::Modified => StatAction::Allow, + Self::Drop { .. } => StatAction::Deny, + Self::Hairpin(..) => StatAction::Hairpin, + } + } +} + impl From for InternalProcessResult { fn from(hpa: HdlPktAction) -> Self { match hpa { diff --git a/lib/opte/src/engine/stat.rs b/lib/opte/src/engine/stat.rs index 2afa2595..45b50299 100644 --- a/lib/opte/src/engine/stat.rs +++ b/lib/opte/src/engine/stat.rs @@ -25,10 +25,30 @@ use opte_api::PacketCounter as ApiPktCounter; use opte_api::TcpState; use uuid::Uuid; +// TODO EXPIRY +// TODO DELETION + /// Opaque identifier for tracking unique stat objects. -#[derive(Copy, Clone, Hash, PartialEq, PartialOrd, Eq, Ord)] +#[derive(Copy, Clone, Hash, PartialEq, PartialOrd, Eq, Ord, Debug)] pub struct StatId(u64); +impl StatId { + fn new(val: &mut u64) -> Self { + let out = *val; + *val += 1; + StatId(out) + } +} + +/// Reduced form of an action for stats tracking purposes. +#[derive(Copy, Clone, Debug, PartialEq, PartialOrd, Ord, Eq, Hash, Default)] +pub enum Action { + #[default] + Allow, + Deny, + Hairpin, +} + pub struct FlowStat { /// The direction of this flow half. pub dir: Direction, @@ -46,6 +66,17 @@ pub struct FlowStat { pub last_hit: AtomicU64, } +impl FlowStat { + pub fn hit(&self, pkt_size: u64) { + self.hit_at(pkt_size, Moment::now()); + } + + pub fn hit_at(&self, pkt_size: u64, time: Moment) { + self.last_hit.store(time.raw(), Ordering::Relaxed); + self.shared.stats.hit(self.dir, pkt_size); + } +} + pub struct SharedFlowStat { /// Actual stats associated with this flow. pub stats: PacketCounter, @@ -77,12 +108,51 @@ pub struct TableStat { pub children: KRwLock>>, /// The actual stats! - pub stats: Arc, + pub stats: FullCounter, /// When was this flow last updated? pub last_hit: AtomicU64, } +impl TableStat { + /// Allow a packet which will track local stats via a UFT entry. + pub fn allow(&self) { + self.allow_at(Moment::now()); + } + + /// Allow a packet (at a given timestamp) which will track local stats via + /// a UFT entry. + pub fn allow_at(&self, time: Moment) { + self.last_hit.store(time.raw(), Ordering::Relaxed); + self.stats.allow.fetch_add(1, Ordering::Relaxed); + } + + /// Record an action for a packet which will ultimately be dropped or + /// hairpinned. + pub fn act(&self, action: Action, pkt_size: u64, direction: Direction) { + self.act_at(action, pkt_size, direction, Moment::now()); + } + + /// Record an action for a packet (at a given time) which will ultimately + /// be dropped or hairpinned. + pub fn act_at( + &self, + action: Action, + pkt_size: u64, + direction: Direction, + time: Moment, + ) { + self.last_hit.store(time.raw(), Ordering::Relaxed); + self.stats.packets.hit(direction, pkt_size); + let stat = match action { + Action::Allow => &self.stats.allow, + Action::Deny => &self.stats.deny, + Action::Hairpin => &self.stats.hairpin, + }; + stat.fetch_add(1, Ordering::Relaxed); + } +} + pub struct PacketCounter { pub id: StatId, @@ -92,6 +162,45 @@ pub struct PacketCounter { pub bytes_out: AtomicU64, } +impl PacketCounter { + fn from_next_id(id: &mut u64) -> PacketCounter { + PacketCounter { + id: StatId::new(id), + pkts_in: 0.into(), + bytes_in: 0.into(), + pkts_out: 0.into(), + bytes_out: 0.into(), + } + } + + #[inline] + fn hit(&self, direction: Direction, pkt_size: u64) { + let (pkts, bytes) = match direction { + Direction::In => (&self.pkts_in, &self.bytes_in), + Direction::Out => (&self.pkts_out, &self.bytes_out), + }; + pkts.fetch_add(1, Ordering::Relaxed); + bytes.fetch_add(pkt_size, Ordering::Relaxed); + } + + fn combine(&self, into: &Self) { + into.pkts_in + .fetch_add(self.pkts_in.load(Ordering::Relaxed), Ordering::Relaxed); + into.bytes_in.fetch_add( + self.bytes_in.load(Ordering::Relaxed), + Ordering::Relaxed, + ); + into.pkts_out.fetch_add( + self.pkts_out.load(Ordering::Relaxed), + Ordering::Relaxed, + ); + into.bytes_out.fetch_add( + self.bytes_out.load(Ordering::Relaxed), + Ordering::Relaxed, + ); + } +} + impl From<&PacketCounter> for ApiPktCounter { fn from(val: &PacketCounter) -> Self { ApiPktCounter { @@ -110,6 +219,32 @@ pub struct FullCounter { pub packets: PacketCounter, } +impl FullCounter { + fn from_next_id(id: &mut u64) -> FullCounter { + FullCounter { + allow: 0.into(), + deny: 0.into(), + hairpin: 0.into(), + packets: PacketCounter::from_next_id(id), + } + } + + fn combine(&self, into: &Self) { + into.packets.combine(&self.packets); + into.allow + .fetch_add(self.allow.load(Ordering::Relaxed), Ordering::Relaxed); + into.deny + .fetch_add(self.deny.load(Ordering::Relaxed), Ordering::Relaxed); + into.hairpin + .fetch_add(self.hairpin.load(Ordering::Relaxed), Ordering::Relaxed); + } + + #[inline] + fn id(&self) -> StatId { + self.packets.id + } +} + impl From<&FullCounter> for ApiFullCounter { fn from(val: &FullCounter) -> Self { ApiFullCounter { @@ -141,42 +276,6 @@ impl FoldStat for TableStat { } } -impl PacketCounter { - fn combine(&self, into: &Self) { - into.pkts_in - .fetch_add(self.pkts_in.load(Ordering::Relaxed), Ordering::Relaxed); - into.bytes_in.fetch_add( - self.bytes_in.load(Ordering::Relaxed), - Ordering::Relaxed, - ); - into.pkts_out.fetch_add( - self.pkts_out.load(Ordering::Relaxed), - Ordering::Relaxed, - ); - into.bytes_out.fetch_add( - self.bytes_out.load(Ordering::Relaxed), - Ordering::Relaxed, - ); - } -} - -impl FullCounter { - fn combine(&self, into: &Self) { - into.packets.combine(&self.packets); - into.allow - .fetch_add(self.allow.load(Ordering::Relaxed), Ordering::Relaxed); - into.deny - .fetch_add(self.deny.load(Ordering::Relaxed), Ordering::Relaxed); - into.hairpin - .fetch_add(self.hairpin.load(Ordering::Relaxed), Ordering::Relaxed); - } - - #[inline] - fn id(&self) -> StatId { - self.packets.id - } -} - /// Tracking/handling of all stats. /// /// ?? Describe? @@ -189,53 +288,31 @@ pub struct StatTree { } impl StatTree { - fn get_id(&mut self) -> StatId { - let out = self.next_id; - self.next_id += 1; - StatId(out) - } - - fn pkt_counter(&mut self) -> PacketCounter { - PacketCounter { - id: self.get_id(), - pkts_in: 0.into(), - bytes_in: 0.into(), - pkts_out: 0.into(), - bytes_out: 0.into(), - } - } - - fn full_counter(&mut self) -> FullCounter { - FullCounter { - allow: 0.into(), - deny: 0.into(), - hairpin: 0.into(), - packets: self.pkt_counter(), - } - } - pub fn new_root(&mut self) -> Arc { // TODO: RNG in illumos kernel? let uuid = Uuid::from_u64_pair(0, self.next_id); - self.new_root_with_id(uuid) + self.root(uuid) } - pub fn new_root_with_id(&mut self, uuid: Uuid) -> Arc { - let mut children = KRwLock::new(vec![]); - children.init(KRwLockType::Driver); - - let out = Arc::new(TableStat { - id: Some(uuid), - parents: vec![], - children, - stats: self.full_counter().into(), - last_hit: Moment::now().raw().into(), - }); - - // TODO: what if already exists?! - let _ = self.roots.insert(uuid, out.clone()); - - out + /// Gets or creates the root stat for a given UUID. + pub fn root(&mut self, uuid: Uuid) -> Arc { + let ids = &mut self.next_id; + + self.roots + .entry(uuid) + .or_insert_with_key(|id| { + let mut children = KRwLock::new(vec![]); + children.init(KRwLockType::Driver); + + Arc::new(TableStat { + id: Some(*id), + parents: vec![], + children, + stats: FullCounter::from_next_id(ids), + last_hit: Moment::now().raw().into(), + }) + }) + .clone() } pub fn new_intermediate( @@ -249,7 +326,7 @@ impl StatTree { id: None, parents, children, - stats: self.full_counter().into(), + stats: FullCounter::from_next_id(&mut self.next_id), last_hit: Moment::now().raw().into(), }); @@ -273,6 +350,8 @@ impl StatTree { ) -> Arc { if let Entry::Occupied(e) = self.flows.entry(*flow_id) { // TODO: what to do with (maybe new) parents & bases?! + // I *think* these should win out, insert, and preserve + // the old stats. Need to think about it. return e.get().clone(); } @@ -296,7 +375,7 @@ impl StatTree { parents, bases, shared: Arc::new(SharedFlowStat { - stats: self.pkt_counter(), + stats: PacketCounter::from_next_id(&mut self.next_id), // TODO tcp: None, first_dir: dir, @@ -311,6 +390,30 @@ impl StatTree { .expect("Proven a miss on flow_id already") .clone() } + + #[cfg(test)] + pub fn dump(&self) -> String { + let mut out = String::new(); + out.push_str("Roots\n"); + for (id, root) in &self.roots { + let d = ApiFullCounter::from(root.stats.as_ref()); + out.push_str(&format!("\t{:?}/{id} -> {d:?}\n", root.stats.id())); + } + out.push_str("Ints\n"); + for root in &self.intermediate { + let d = ApiFullCounter::from(root.stats.as_ref()); + out.push_str(&format!("\t{:?} -> {d:?}\n", root.stats.id())); + } + out.push_str("Flows\n"); + for (id, stat) in &self.flows { + let d: ApiFlowStat = stat.as_ref().into(); + out.push_str(&format!( + "\t{}/{}/{:?} -> {d:?}\n", + id, stat.dir, stat.shared.stats.id + )); + } + out + } } fn get_base_ids(parents: &[Arc]) -> BTreeSet { @@ -326,3 +429,62 @@ fn get_base_ids(parents: &[Arc]) -> BTreeSet { out } + +/// XXX holds stats as they arrive on a packet. +pub struct FlowStatBuilder { + parents: Vec>, + layer_end: usize, +} + +impl FlowStatBuilder { + pub fn new() -> Self { + Self { + // TODO: do we want this cfg'able? + parents: Vec::with_capacity(16), + layer_end: 0, + } + } + + /// Push a parent onto this flow. + pub fn push(&mut self, parent: Arc) { + self.parents.push(parent); + } + + /// Mark all current parents as [`Action::Allow`]. + pub fn end_layer(&mut self) { + self.layer_end = self.parents.len(); + } + + /// Return a list of stat parents if this packet is bound for flow creation. + pub fn terminate( + self, + action: Action, + pkt_size: u64, + direction: Direction, + ) -> Option>> { + match action { + Action::Allow => { + self.parents.iter().for_each(|v| v.allow()); + Some(self.parents) + } + Action::Deny | Action::Hairpin => { + let (accepted, last_layer) = + self.parents.split_at(self.layer_end); + accepted + .iter() + .for_each(|v| v.act(Action::Allow, pkt_size, direction)); + last_layer + .iter() + .for_each(|v| v.act(action, pkt_size, direction)); + + None + } + } + } +} + +impl Default for FlowStatBuilder { + fn default() -> Self { + Self::new() + } +} From 9cefaebe047b36ac22544f2ed1b70e2a6db70801 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Fri, 2 May 2025 12:05:55 +0100 Subject: [PATCH 03/37] No LFTs yet, no expiry. Rather a lot of plumbing --- lib/opte-test-utils/src/lib.rs | 6 +- lib/opte/src/engine/layer.rs | 122 ++++++++--- lib/opte/src/engine/packet.rs | 9 +- lib/opte/src/engine/port/mod.rs | 230 +++++++++++++-------- lib/opte/src/engine/rule.rs | 32 ++- lib/opte/src/engine/stat.rs | 44 ++-- lib/oxide-vpc/src/engine/firewall.rs | 42 ++-- lib/oxide-vpc/src/engine/gateway/arp.rs | 11 +- lib/oxide-vpc/src/engine/gateway/dhcp.rs | 8 +- lib/oxide-vpc/src/engine/gateway/dhcpv6.rs | 6 +- lib/oxide-vpc/src/engine/gateway/icmp.rs | 6 +- lib/oxide-vpc/src/engine/gateway/icmpv6.rs | 10 +- lib/oxide-vpc/src/engine/gateway/mod.rs | 40 ++-- lib/oxide-vpc/src/engine/nat.rs | 8 +- lib/oxide-vpc/src/engine/overlay.rs | 10 +- lib/oxide-vpc/src/engine/router.rs | 8 +- 16 files changed, 384 insertions(+), 208 deletions(-) diff --git a/lib/opte-test-utils/src/lib.rs b/lib/opte-test-utils/src/lib.rs index f3eac140..a7a340cb 100644 --- a/lib/opte-test-utils/src/lib.rs +++ b/lib/opte-test-utils/src/lib.rs @@ -269,11 +269,11 @@ fn oxide_net_builder( let dhcp = base_dhcp_config(); firewall::setup(&mut pb, fw_limit).expect("failed to add firewall layer"); - gateway::setup(&pb, cfg, vpc_map, fw_limit, &dhcp) + gateway::setup(&mut pb, cfg, vpc_map, fw_limit, &dhcp) .expect("failed to setup gateway layer"); - router::setup(&pb, cfg, one_limit).expect("failed to add router layer"); + router::setup(&mut pb, cfg, one_limit).expect("failed to add router layer"); nat::setup(&mut pb, cfg, snat_limit).expect("failed to add nat layer"); - overlay::setup(&pb, cfg, v2p, v2b, one_limit) + overlay::setup(&mut pb, cfg, v2p, v2b, one_limit) .expect("failed to add overlay layer"); pb } diff --git a/lib/opte/src/engine/layer.rs b/lib/opte/src/engine/layer.rs index bc664e6b..2671dc17 100644 --- a/lib/opte/src/engine/layer.rs +++ b/lib/opte/src/engine/layer.rs @@ -17,6 +17,7 @@ use super::packet::InnerFlowId; use super::packet::MblkFullParsed; use super::packet::MblkPacketData; use super::packet::Packet; +use super::port::PortBuilder; use super::port::Transforms; use super::port::meta::ActionMeta; use super::rule; @@ -28,6 +29,8 @@ use super::rule::GenBtError; use super::rule::HdrTransformError; use super::rule::Rule; use super::rule::ht_probe; +use super::stat::StatTree; +use super::stat::TableStat; use crate::ExecCtx; use crate::LogLevel; use crate::api::DumpLayerResp; @@ -56,6 +59,7 @@ use opte_api::Direction; use opte_api::RuleDump; use opte_api::RuleId; use opte_api::RuleTableEntryDump; +use uuid::Uuid; #[derive(Debug)] pub enum LayerError { @@ -353,8 +357,9 @@ pub enum EntryState { /// reasonable to open this up to be any [`Action`], if such a use /// case were to present itself. For now, we stay conservative, and /// supply only what the current consumers need. -#[derive(Copy, Clone, Debug)] +#[derive(Copy, Clone, Debug, Default)] pub enum DefaultAction { + #[default] Allow, StatefulAllow, Deny, @@ -407,7 +412,7 @@ impl Display for ActionDescEntry { /// /// This describes the actions a layer's rules can take as well as the /// [`DefaultAction`] to take when a rule doesn't match. -#[derive(Debug)] +#[derive(Debug, Default)] pub struct LayerActions { /// The list of actions shared among the layer's rules. An action /// doesn't have to be shared, each rule is free to create its @@ -419,9 +424,15 @@ pub struct LayerActions { /// direction. pub default_in: DefaultAction, + /// The stats ID to attach to the default-in action. + pub default_in_stat_id: Option, + /// The default action to take if no rule matches in the outbound /// direction. pub default_out: DefaultAction, + + /// The stats ID to attach to the default-in action. + pub default_out_stat_id: Option, } #[derive(KStatProvider)] @@ -503,8 +514,10 @@ pub struct Layer { actions: Vec, default_in: DefaultAction, default_in_hits: u64, + default_in_stat: Arc, default_out: DefaultAction, default_out_hits: u64, + default_out_stat: Arc, ft: LayerFlowTable, ft_cstr: CString, rules_in: RuleTable, @@ -518,15 +531,20 @@ impl Layer { self.actions.get(idx).cloned() } - pub fn add_rule(&mut self, dir: Direction, rule: Rule) { + pub fn add_rule( + &mut self, + dir: Direction, + rule: Rule, + stats: &mut StatTree, + ) { match dir { Direction::Out => { - self.rules_out.add(rule); + self.rules_out.add(rule, stats); self.stats.vals.out_rules += 1; } Direction::In => { - self.rules_in.add(rule); + self.rules_in.add(rule, stats); self.stats.vals.in_rules += 1; } } @@ -737,18 +755,24 @@ impl Layer { pub fn new( name: &'static str, - port: &str, + port: &mut PortBuilder, actions: LayerActions, ft_limit: NonZeroU32, ) -> Self { - let port_c = CString::new(port).unwrap(); + let stats = port.stats_mut(); + let default_in_stat = stats.root(actions.default_in_stat_id); + let default_out_stat = stats.root(actions.default_out_stat_id); + + let port_name = port.name(); + + let port_c = CString::new(port_name).unwrap(); let name_c = CString::new(name).unwrap(); // Unwrap: We know this is fine because the stat names are // generated from the LayerStats structure. let stats = KStatNamed::new( "xde", - &format!("{}_{}", port, name), + &format!("{}_{}", port_name, name), LayerStats::new(), ) .unwrap(); @@ -759,15 +783,17 @@ impl Layer { actions: actions.actions, default_in: actions.default_in, default_in_hits: 0, + default_in_stat, default_out: actions.default_out, default_out_hits: 0, + default_out_stat, name, name_c, port_c, - ft: LayerFlowTable::new(port, name, ft_limit), + ft: LayerFlowTable::new(port_name, name, ft_limit), ft_cstr: CString::new(format!("ft-{}", name)).unwrap(), - rules_in: RuleTable::new(port, name, Direction::In), - rules_out: RuleTable::new(port, name, Direction::Out), + rules_in: RuleTable::new(port_name, name, Direction::In), + rules_out: RuleTable::new(port_name, name, Direction::Out), rt_cstr: CString::new(format!("rt-{}", name)).unwrap(), stats, } @@ -795,12 +821,12 @@ impl Layer { xforms: &mut Transforms, ameta: &mut ActionMeta, ) -> result::Result { - use Direction::*; let flow_before = *pkt.flow(); self.layer_process_entry_probe(dir, pkt.flow()); + pkt.meta_mut().stats.new_layer(); let res = match dir { - Out => self.process_out(ectx, pkt, xforms, ameta), - In => self.process_in(ectx, pkt, xforms, ameta), + Direction::Out => self.process_out(ectx, pkt, xforms, ameta), + Direction::In => self.process_in(ectx, pkt, xforms, ameta), }; self.layer_process_return_probe(dir, &flow_before, pkt.flow(), &res); res @@ -888,15 +914,21 @@ impl Layer { self.stats.vals.in_lft_miss += 1; let rule = self.rules_in.find_match(pkt.flow(), pkt.meta(), ameta); - let action = if let Some(rule) = rule { + let (action, stat) = if let Some(rule) = rule { self.stats.vals.in_rule_match += 1; - rule.action() + (rule.rule.action(), rule.stat.clone()) } else { self.stats.vals.in_rule_nomatch += 1; self.default_in_hits += 1; - self.default_in.into() + (self.default_in.into(), self.default_in_stat.clone()) }; + // No LFT to account for. + let mut stat = Some(stat); + if !matches!(action, Action::StatefulAllow | Action::Stateful(_)) { + pkt.meta_mut().stats.push(stat.take().unwrap()); + } + match action { Action::Allow => Ok(LayerResult::Allow), @@ -909,6 +941,8 @@ impl Layer { }); } + // TODO: how on earth are we plumbing StatTree into here?? + // The outbound flow ID mirrors the inbound. Remember, // the "top" of layer represents how the client sees // the traffic, and the "bottom" of the layer @@ -1019,6 +1053,8 @@ impl Layer { }); } + // TODO: how on earth are we plumbing StatTree into here?? + let desc = match action.gen_desc(pkt.flow(), pkt, ameta) { Ok(aord) => match aord { AllowOrDeny::Allow(desc) => desc, @@ -1175,15 +1211,21 @@ impl Layer { self.stats.vals.out_lft_miss += 1; let rule = self.rules_out.find_match(pkt.flow(), pkt.meta(), ameta); - let action = if let Some(rule) = rule { + let (action, stat) = if let Some(rule) = rule { self.stats.vals.out_rule_match += 1; - rule.action() + (rule.rule.action(), rule.stat.clone()) } else { self.stats.vals.out_rule_nomatch += 1; self.default_out_hits += 1; - self.default_out.into() + (self.default_out.into(), self.default_out_stat.clone()) }; + // No LFT to account for. + let mut stat = Some(stat); + if !matches!(action, Action::StatefulAllow | Action::Stateful(_)) { + pkt.meta_mut().stats.push(stat.take().unwrap()); + } + match action { Action::Allow => Ok(LayerResult::Allow), @@ -1196,6 +1238,8 @@ impl Layer { }); } + // TODO: how on earth are we plumbing StatTree into here?? + // The inbound flow ID must be calculated _after_ the // header transformation. Remember, the "top" // (outbound) of layer represents how the client sees @@ -1308,6 +1352,8 @@ impl Layer { }); } + // TODO: how on earth are we plumbing StatTree into here?? + let desc = match action.gen_desc(pkt.flow(), pkt, ameta) { Ok(aord) => match aord { AllowOrDeny::Allow(desc) => desc, @@ -1492,9 +1538,10 @@ impl Layer { &mut self, in_rules: Vec>, out_rules: Vec>, + stats: &mut StatTree, ) { self.ft.clear(); - self.set_rules_core(in_rules, out_rules); + self.set_rules_core(in_rules, out_rules, stats); } /// Set all rules at once without clearing the flow table. @@ -1505,18 +1552,20 @@ impl Layer { &mut self, in_rules: Vec>, out_rules: Vec>, + stats: &mut StatTree, ) { self.ft.mark_dirty(); - self.set_rules_core(in_rules, out_rules); + self.set_rules_core(in_rules, out_rules, stats); } fn set_rules_core( &mut self, in_rules: Vec>, out_rules: Vec>, + stats: &mut StatTree, ) { - self.rules_in.set_rules(in_rules); - self.rules_out.set_rules(out_rules); + self.rules_in.set_rules(in_rules, stats); + self.rules_out.set_rules(out_rules, stats); self.stats.vals.set_rules_called += 1; self.stats.vals.in_rules.set(self.rules_in.num_rules() as u64); self.stats.vals.out_rules.set(self.rules_out.num_rules() as u64); @@ -1532,6 +1581,7 @@ struct RuleTableEntry { id: RuleId, hits: u64, rule: Rule, + stat: Arc, } impl From<&RuleTableEntry> for RuleTableEntryDump { @@ -1561,15 +1611,18 @@ pub enum RuleRemoveErr { } impl RuleTable { - fn add(&mut self, rule: Rule) { + fn add(&mut self, rule: Rule, stats: &mut StatTree) { + let stat = stats.root(rule.stat_id().copied()); match self.find_pos(&rule) { RulePlace::End => { - let rte = RuleTableEntry { id: self.next_id, hits: 0, rule }; + let rte = + RuleTableEntry { id: self.next_id, hits: 0, rule, stat }; self.rules.push(rte); } RulePlace::Insert(idx) => { - let rte = RuleTableEntry { id: self.next_id, hits: 0, rule }; + let rte = + RuleTableEntry { id: self.next_id, hits: 0, rule, stat }; self.rules.insert(idx, rte); } } @@ -1589,7 +1642,7 @@ impl RuleTable { ifid: &InnerFlowId, pmeta: &MblkPacketData, ameta: &ActionMeta, - ) -> Option<&Rule> { + ) -> Option<&RuleTableEntry> { for rte in self.rules.iter_mut() { if rte.rule.is_match(pmeta, ameta) { rte.hits += 1; @@ -1600,7 +1653,7 @@ impl RuleTable { ifid, &rte.rule, ); - return Some(&rte.rule); + return Some(rte); } } @@ -1738,10 +1791,14 @@ impl RuleTable { } } - pub fn set_rules(&mut self, new_rules: Vec>) { + pub fn set_rules( + &mut self, + new_rules: Vec>, + stats: &mut StatTree, + ) { self.rules.clear(); for r in new_rules { - self.add(r); + self.add(r, stats); } } } @@ -1825,6 +1882,7 @@ mod test { use crate::engine::predicate::Predicate; use crate::engine::rule; + let mut stats = StatTree::default(); let mut rule_table = RuleTable::new("port", "test", Direction::Out); let mut rule = Rule::new( 1, @@ -1835,7 +1893,7 @@ mod test { Ipv4AddrMatch::Prefix(cidr), ])); - rule_table.add(rule.finalize()); + rule_table.add(rule.finalize(), &mut stats); let mut test_pkt = MsgBlk::new_ethernet_pkt(( Ethernet { ethertype: Ethertype::IPV4, ..Default::default() }, diff --git a/lib/opte/src/engine/packet.rs b/lib/opte/src/engine/packet.rs index 46c259cf..f20247cc 100644 --- a/lib/opte/src/engine/packet.rs +++ b/lib/opte/src/engine/packet.rs @@ -33,6 +33,7 @@ use super::rule::CompiledEncap; use super::rule::CompiledTransform; use super::rule::HdrTransform; use super::rule::HdrTransformError; +use super::stat::FlowStatBuilder; pub use crate::api::AddrPair; pub use crate::api::FLOW_ID_DEFAULT; pub use crate::api::InnerFlowId; @@ -457,6 +458,7 @@ pub struct PacketData { pub(crate) headers: OpteMeta, initial_lens: Option>, body: PktBodyWalker, + pub(crate) stats: FlowStatBuilder, } impl From> for OpteMeta { @@ -768,7 +770,12 @@ where .into(), ); let body = PktBodyWalker::new(last_chunk, data); - let meta = Box::new(PacketData { headers, initial_lens, body }); + let meta = Box::new(PacketData { + headers, + initial_lens, + body, + stats: FlowStatBuilder::new(), + }); Packet { state: FullParsed { diff --git a/lib/opte/src/engine/port/mod.rs b/lib/opte/src/engine/port/mod.rs index f7373e26..a59c6bd2 100644 --- a/lib/opte/src/engine/port/mod.rs +++ b/lib/opte/src/engine/port/mod.rs @@ -45,6 +45,7 @@ use super::rule::HdrTransformError; use super::rule::Rule; use super::rule::TransformFlags; use super::stat::Action as StatAction; +use super::stat::FlowStat; use super::stat::StatTree; use super::tcp::KEEPALIVE_EXPIRE_TTL; use super::tcp::TIME_WAIT_EXPIRE_TTL; @@ -244,7 +245,8 @@ pub struct PortBuilder { // probes. name_cstr: CString, mac: MacAddr, - layers: KMutex>, + layers: Vec, + flow_stats: StatTree, } #[derive(Clone, Debug)] @@ -279,36 +281,34 @@ impl PortBuilder { /// a packet from the guest. The last is the last to see a packet /// before it is delivered to the guest. pub fn add_layer( - &self, + &mut self, new_layer: Layer, pos: Pos, ) -> result::Result<(), OpteError> { - let mut lock = self.layers.lock(); - match pos { Pos::Last => { - lock.push(new_layer); + self.layers.push(new_layer); return Ok(()); } Pos::First => { - lock.insert(0, new_layer); + self.layers.insert(0, new_layer); return Ok(()); } Pos::Before(name) => { - for (i, layer) in lock.iter().enumerate() { + for (i, layer) in self.layers.iter().enumerate() { if layer.name() == name { - lock.insert(i, new_layer); + self.layers.insert(i, new_layer); return Ok(()); } } } Pos::After(name) => { - for (i, layer) in lock.iter().enumerate() { + for (i, layer) in self.layers.iter().enumerate() { if layer.name() == name { - lock.insert(i + 1, new_layer); + self.layers.insert(i + 1, new_layer); return Ok(()); } } @@ -324,14 +324,14 @@ impl PortBuilder { /// Add a new `Rule` to the layer named by `layer`, if such a /// layer exists. Otherwise, return an error. pub fn add_rule( - &self, + &mut self, layer_name: &str, dir: Direction, rule: Rule, ) -> result::Result<(), OpteError> { - for layer in &mut *self.layers.lock() { + for layer in &mut self.layers { if layer.name() == layer_name { - layer.add_rule(dir, rule); + layer.add_rule(dir, rule, &mut self.flow_stats); return Ok(()); } } @@ -347,9 +347,7 @@ impl PortBuilder { ) -> result::Result, PortCreateError> { let data = PortData { state: PortState::Ready, - // At this point the layer pipeline is immutable, thus we - // move the layers out of the mutex. - layers: self.layers.into_inner(), + layers: self.layers, uft_in: FlowTable::new(&self.name, "uft_in", uft_limit, None), uft_out: FlowTable::new(&self.name, "uft_out", uft_limit, None), tcp_flows: FlowTable::new( @@ -358,6 +356,7 @@ impl PortBuilder { tcp_limit, Some(Box::::default()), ), + flow_stats: self.flow_stats, }; let mut data = KRwLock::new(data); @@ -372,8 +371,6 @@ impl PortBuilder { stats: KStatNamed::new("xde", &self.name, PortStats::new())?, net, data, - - flow_stats: Default::default(), }) } @@ -381,7 +378,7 @@ impl PortBuilder { /// [`Layer`] at the given index. If the layer does not exist, or /// has no action at that index, then `None` is returned. pub fn layer_action(&self, layer: &str, idx: usize) -> Option { - for l in &*self.layers.lock() { + for l in &self.layers { if l.name() == layer { return l.action(idx); } @@ -393,9 +390,8 @@ impl PortBuilder { /// List each [`Layer`] under this port. pub fn list_layers(&self) -> ListLayersResp { let mut tmp = vec![]; - let lock = self.layers.lock(); - for layer in lock.iter() { + for layer in self.layers.iter() { tmp.push(LayerDesc { name: layer.name().to_string(), rules_in: layer.num_rules(Direction::In), @@ -425,22 +421,26 @@ impl PortBuilder { name_cstr, mac, ectx, - layers: KMutex::new(Vec::new()), + layers: vec![], + flow_stats: Default::default(), } } /// Remove the [`Layer`] registered under `name`, if such a layer /// exists. - pub fn remove_layer(&self, name: &str) { - let mut lock = self.layers.lock(); - - for (i, layer) in lock.iter().enumerate() { + pub fn remove_layer(&mut self, name: &str) { + for (i, layer) in self.layers.iter().enumerate() { if layer.name() == name { - let _ = lock.remove(i); + let _ = self.layers.remove(i); return; } } } + + /// Provide access to the inner [`StatTree`]. + pub fn stats_mut(&mut self) -> &mut StatTree { + &mut self.flow_stats + } } /// The current state of the [`Port`]. @@ -565,6 +565,8 @@ pub struct UftEntry { /// Cached reference to a flow's TCP state, if applicable. /// This allows us to maintain up-to-date TCP flow table info tcp_flow: Option>>, + + stat: Arc, } impl Dump for UftEntry { @@ -597,7 +599,8 @@ impl Display for UftEntry { impl fmt::Debug for UftEntry { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - let UftEntry { pair: _pair, xforms, l4_hash, epoch, tcp_flow } = self; + let UftEntry { pair: _pair, xforms, l4_hash, epoch, tcp_flow, stat } = + self; f.debug_struct("UftEntry") .field("pair", &"") @@ -605,6 +608,7 @@ impl fmt::Debug for UftEntry { .field("l4_hash", l4_hash) .field("epoch", epoch) .field("tcp_flow", tcp_flow) + .field("stats", &"TODO") .finish() } } @@ -702,6 +706,8 @@ struct PortData { // that we know which inbound UFT/FT entries to retire upon // connection termination. tcp_flows: FlowTable, + + flow_stats: StatTree, } /// A virtual switch port. @@ -769,8 +775,6 @@ pub struct Port { stats: KStatNamed, net: N, data: KRwLock, - - flow_stats: StatTree, } // Convert: @@ -901,10 +905,12 @@ impl Port { let mut data = self.data.write(); check_state!(data.state, [PortState::Ready, PortState::Running])?; - for layer in &mut data.layers { + let PortData { layers, flow_stats, .. } = &mut (*data); + + for layer in layers { if layer.name() == layer_name { self.epoch.fetch_add(1, SeqCst); - layer.add_rule(dir, rule); + layer.add_rule(dir, rule, flow_stats); return Ok(()); } } @@ -1253,6 +1259,7 @@ impl Port { let process_start = Moment::now(); let flow_before = pkt.flow(); let mblk_addr = pkt.mblk_addr(); + let pkt_len = pkt.len() as u64; // Packet processing is split into a few mechanisms based on // expected speed, based on actions and the size of required metadata: @@ -1354,6 +1361,8 @@ impl Port { // The Fast Path. drop(lock.take()); let xforms = &entry.state().xforms; + entry.state().stat.hit_at(pkt_len, process_start); + let out = if xforms.compiled.is_some() { FastPathDecision::CompiledUft(entry) } else { @@ -1419,7 +1428,7 @@ impl Port { self.name_cstr.as_c_str(), tcp, dir, - pkt.len() as u64, + pkt_len, ufid_in, ) { Ok(TcpState::Closed) => Some(Arc::clone(tcp_flow)), @@ -1686,10 +1695,12 @@ impl Port { let mut data = self.data.write(); check_state!(data.state, [PortState::Ready, PortState::Running])?; - for layer in &mut data.layers { + let PortData { layers, flow_stats, .. } = &mut (*data); + + for layer in layers { if layer.name() == layer_name { self.epoch.fetch_add(1, SeqCst); - layer.set_rules(in_rules, out_rules); + layer.set_rules(in_rules, out_rules, flow_stats); return Ok(()); } } @@ -1707,10 +1718,12 @@ impl Port { let mut data = self.data.write(); check_state!(data.state, [PortState::Ready, PortState::Running])?; - for layer in &mut data.layers { + let PortData { layers, flow_stats, .. } = &mut (*data); + + for layer in layers { if layer.name() == layer_name { self.epoch.fetch_add(1, SeqCst); - layer.set_rules_soft(in_rules, out_rules); + layer.set_rules_soft(in_rules, out_rules, flow_stats); return Ok(()); } } @@ -2346,36 +2359,48 @@ impl Port { self.stats.vals.in_uft_miss.incr(1); let mut xforms = Transforms::new(); let res = self.layers_process(data, In, pkt, &mut xforms, ameta); - match res { + + let (ipr, create_flow) = match res { Ok(LayerResult::Allow) => { // If there is no flow ID, then do not create a UFT // entry. - if *ufid_in == FLOW_ID_DEFAULT { - return Ok(InternalProcessResult::Modified); - } + (InternalProcessResult::Modified, *ufid_in != FLOW_ID_DEFAULT) } - Ok(LayerResult::Deny { name, reason }) => { - return Ok(InternalProcessResult::Drop { + Ok(LayerResult::Deny { name, reason }) => ( + InternalProcessResult::Drop { reason: DropReason::Layer { name, reason }, - }); - } + }, + false, + ), Ok(LayerResult::Hairpin(hppkt)) => { - return Ok(InternalProcessResult::Hairpin(hppkt)); + (InternalProcessResult::Hairpin(hppkt), false) } - Ok(LayerResult::HandlePkt) => { - return Ok(InternalProcessResult::from(self.net.handle_pkt( + Ok(LayerResult::HandlePkt) => ( + InternalProcessResult::from(self.net.handle_pkt( In, pkt, &data.uft_in, &data.uft_out, - )?)); - } + )?), + false, + ), + // TODO: Errors as a decision?! Err(e) => return Err(ProcessError::Layer(e)), - } + }; + + let pkt_len = pkt.len() as u64; + let Some(stat_parents) = pkt.meta_mut().stats.terminate( + ipr.stat_action(), + pkt_len, + In, + create_flow, + ) else { + return Ok(ipr); + }; let mut flags = TransformFlags::empty(); if pkt.checksums_dirty() { @@ -2386,12 +2411,15 @@ impl Port { } let ufid_out = pkt.flow().mirror(); + let stat = + data.flow_stats.new_flow(ufid_in, &ufid_out, In, stat_parents); let mut hte = UftEntry { pair: KMutex::new(Some(ufid_out)), xforms: xforms.compile(flags), epoch, l4_hash: ufid_in.crc32(), tcp_flow: None, + stat, }; // Keep around the comment on the `None` arm @@ -2421,12 +2449,7 @@ impl Port { // For inbound traffic the TCP flow table must be // checked _after_ processing take place. if pkt.meta().is_inner_tcp() { - match self.process_in_tcp( - data, - pkt.meta(), - ufid_in, - pkt.len() as u64, - ) { + match self.process_in_tcp(data, pkt.meta(), ufid_in, pkt_len) { Ok(TcpMaybeClosed::Closed { .. }) => { Ok(InternalProcessResult::Modified) } @@ -2551,6 +2574,8 @@ impl Port { self.stats.vals.out_uft_miss.incr(1); let mut tcp_closed = false; + let pkt_len = pkt.len() as u64; + // For outbound traffic the TCP flow table must be checked // _before_ processing take place. let tcp_flow = if pkt.meta().is_inner_tcp() { @@ -2558,7 +2583,7 @@ impl Port { data, pkt.flow(), pkt.meta(), - pkt.len() as u64, + pkt_len, ) { Ok(TcpMaybeClosed::Closed { ufid_inbound }) => { tcp_closed = true; @@ -2617,46 +2642,75 @@ impl Port { flags |= TransformFlags::INTERNAL_DESTINATION; } + let (ipr, create_flow) = match res { + Ok(LayerResult::Allow) => { + // If there is no flow ID, then do not create a UFT + // entry. + ( + InternalProcessResult::Modified, + flow_before != FLOW_ID_DEFAULT && !tcp_closed, + ) + } + + Ok(LayerResult::Deny { name, reason }) => ( + InternalProcessResult::Drop { + reason: DropReason::Layer { name, reason }, + }, + false, + ), + + Ok(LayerResult::Hairpin(hppkt)) => { + (InternalProcessResult::Hairpin(hppkt), false) + } + + Ok(LayerResult::HandlePkt) => ( + InternalProcessResult::from(self.net.handle_pkt( + Out, + pkt, + &data.uft_in, + &data.uft_out, + )?), + false, + ), + + // TODO: Errors as a decision?! + Err(e) => return Err(ProcessError::Layer(e)), + }; + + let Some(stat_parents) = pkt.meta_mut().stats.terminate( + ipr.stat_action(), + pkt_len, + Out, + create_flow, + ) else { + return Ok(ipr); + }; + + let ufid_out = pkt.flow().mirror(); + let stat = data.flow_stats.new_flow( + &flow_before, + &ufid_out, + Out, + stat_parents, + ); + let hte = UftEntry { pair: KMutex::new(None), xforms: xforms.compile(flags), epoch, l4_hash: flow_before.crc32(), tcp_flow, + stat, }; - match res { - Ok(LayerResult::Allow) => { - // If there is no Flow ID, then there is no UFT entry. - if flow_before == FLOW_ID_DEFAULT || tcp_closed { - return Ok(InternalProcessResult::Modified); - } - match data.uft_out.add(flow_before, hte) { - Ok(_) => Ok(InternalProcessResult::Modified), - Err(OpteError::MaxCapacity(limit)) => { - Err(ProcessError::FlowTableFull { kind: "UFT", limit }) - } - Err(_) => unreachable!( - "Cannot return other errors from FlowTable::add" - ), - } - } - - Ok(LayerResult::Hairpin(hppkt)) => { - Ok(InternalProcessResult::Hairpin(hppkt)) + match data.uft_out.add(flow_before, hte) { + Ok(_) => Ok(InternalProcessResult::Modified), + Err(OpteError::MaxCapacity(limit)) => { + Err(ProcessError::FlowTableFull { kind: "UFT", limit }) } - - Ok(LayerResult::Deny { name, reason }) => { - Ok(InternalProcessResult::Drop { - reason: DropReason::Layer { name, reason }, - }) + Err(_) => { + unreachable!("Cannot return other errors from FlowTable::add") } - - Ok(LayerResult::HandlePkt) => Ok(InternalProcessResult::from( - self.net.handle_pkt(Out, pkt, &data.uft_in, &data.uft_out)?, - )), - - Err(e) => Err(ProcessError::Layer(e)), } } diff --git a/lib/opte/src/engine/rule.rs b/lib/opte/src/engine/rule.rs index 9ce1030f..f9d9e0e6 100644 --- a/lib/opte/src/engine/rule.rs +++ b/lib/opte/src/engine/rule.rs @@ -68,6 +68,7 @@ use opte_api::Direction; use opte_api::RuleDump; use serde::Deserialize; use serde::Serialize; +use uuid::Uuid; use zerocopy::ByteSliceMut; /// A marker trait indicating a type is an entry acuired from a [`Resource`]. @@ -987,6 +988,7 @@ pub struct Rule { state: S, action: Action, priority: u16, + stat_id: Option, } impl PartialEq for Rule { @@ -1001,6 +1003,10 @@ impl Rule { pub fn action(&self) -> &Action { &self.action } + + pub fn stat_id(&self) -> Option<&Uuid> { + self.stat_id.as_ref() + } } impl Rule { @@ -1010,9 +1016,22 @@ impl Rule { /// any implicit predicates dictated by the action. Additional /// predicates may be added along with the action's implicit ones. pub fn new(priority: u16, action: Action) -> Self { + Rule::new_with_id(priority, action, None) + } + + pub fn new_with_id( + priority: u16, + action: Action, + stat_id: Option, + ) -> Self { let (hdr_preds, data_preds) = action.implicit_preds(); - Rule { state: Ready { hdr_preds, data_preds }, action, priority } + Rule { + state: Ready { hdr_preds, data_preds }, + action, + priority, + stat_id, + } } /// Create a new rule that matches anything. @@ -1023,7 +1042,15 @@ impl Rule { /// useful for making intentions clear that this rule is to match /// anything. pub fn match_any(priority: u16, action: Action) -> Rule { - Rule { state: Finalized { preds: None }, action, priority } + Rule::match_any_with_id(priority, action, None) + } + + pub fn match_any_with_id( + priority: u16, + action: Action, + stat_id: Option, + ) -> Rule { + Rule { state: Finalized { preds: None }, action, priority, stat_id } } /// Add a single [`Predicate`] to the end of the list. @@ -1069,6 +1096,7 @@ impl Rule { state: Finalized { preds }, priority: self.priority, action: self.action, + stat_id: self.stat_id, } } } diff --git a/lib/opte/src/engine/stat.rs b/lib/opte/src/engine/stat.rs index 45b50299..3742be95 100644 --- a/lib/opte/src/engine/stat.rs +++ b/lib/opte/src/engine/stat.rs @@ -114,6 +114,12 @@ pub struct TableStat { pub last_hit: AtomicU64, } +impl core::fmt::Debug for TableStat { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + todo!() + } +} + impl TableStat { /// Allow a packet which will track local stats via a UFT entry. pub fn allow(&self) { @@ -288,14 +294,11 @@ pub struct StatTree { } impl StatTree { - pub fn new_root(&mut self) -> Arc { - // TODO: RNG in illumos kernel? - let uuid = Uuid::from_u64_pair(0, self.next_id); - self.root(uuid) - } - /// Gets or creates the root stat for a given UUID. - pub fn root(&mut self, uuid: Uuid) -> Arc { + /// + /// Allocates a new UUID if none is provided. + pub fn root(&mut self, uuid: Option) -> Arc { + let uuid = uuid.unwrap_or_else(|| Uuid::from_u64_pair(0, self.next_id)); let ids = &mut self.next_id; self.roots @@ -385,10 +388,9 @@ impl StatTree { } }; - self.flows - .insert(*flow_id, out) - .expect("Proven a miss on flow_id already") - .clone() + // Proven a miss on flow_id already + let _ = self.flows.insert(*flow_id, out.clone()); + out } #[cfg(test)] @@ -396,12 +398,12 @@ impl StatTree { let mut out = String::new(); out.push_str("Roots\n"); for (id, root) in &self.roots { - let d = ApiFullCounter::from(root.stats.as_ref()); + let d = ApiFullCounter::from(&root.stats); out.push_str(&format!("\t{:?}/{id} -> {d:?}\n", root.stats.id())); } out.push_str("Ints\n"); for root in &self.intermediate { - let d = ApiFullCounter::from(root.stats.as_ref()); + let d = ApiFullCounter::from(&root.stats); out.push_str(&format!("\t{:?} -> {d:?}\n", root.stats.id())); } out.push_str("Flows\n"); @@ -451,21 +453,29 @@ impl FlowStatBuilder { } /// Mark all current parents as [`Action::Allow`]. - pub fn end_layer(&mut self) { + pub fn new_layer(&mut self) { self.layer_end = self.parents.len(); } /// Return a list of stat parents if this packet is bound for flow creation. pub fn terminate( - self, + &mut self, action: Action, pkt_size: u64, direction: Direction, + create_flow: bool, ) -> Option>> { match action { - Action::Allow => { + Action::Allow if create_flow => { self.parents.iter().for_each(|v| v.allow()); - Some(self.parents) + // TODO: should *take*? + Some(self.parents.clone()) + } + Action::Allow => { + self.parents + .iter() + .for_each(|v| v.act(action, pkt_size, direction)); + None } Action::Deny | Action::Hairpin => { let (accepted, last_layer) = diff --git a/lib/oxide-vpc/src/engine/firewall.rs b/lib/oxide-vpc/src/engine/firewall.rs index 9c27b8ce..998163e9 100644 --- a/lib/oxide-vpc/src/engine/firewall.rs +++ b/lib/oxide-vpc/src/engine/firewall.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2023 Oxide Computer Company +// Copyright 2025 Oxide Computer Company //! The Oxide VPC firewall. //! @@ -48,7 +48,23 @@ pub fn setup( pb: &mut PortBuilder, ft_limit: NonZeroU32, ) -> Result<(), OpteError> { - let fw_layer = Firewall::create_layer(pb.name(), ft_limit); + // The inbound side of the firewall is a filtering layer, only + // traffic explicitly allowed should pass. By setting the + // default inbound action to deny we effectively implement the + // implied "implied deny inbound" rule as speficied in RFD 63 + // §2.8.1. + // + // RFD 63 §2.8.1 also states that all outbond traffic should + // be allowed by default, aka the "implied allow outbound" + // rule. Therefore, we set the default outbound action to + // allow. + let actions = LayerActions { + default_in: DefaultAction::Deny, + default_out: DefaultAction::StatefulAllow, + ..Default::default() + }; + + let fw_layer = Layer::new(FW_LAYER_NAME, pb, actions, ft_limit); pb.add_layer(fw_layer, Pos::First) } @@ -124,28 +140,6 @@ pub fn from_fw_rule(fw_rule: FirewallRule, action: Action) -> Rule { rule.finalize() } -impl Firewall { - pub fn create_layer(port_name: &str, ft_limit: NonZeroU32) -> Layer { - // The inbound side of the firewall is a filtering layer, only - // traffic explicitly allowed should pass. By setting the - // default inbound action to deny we effectively implement the - // implied "implied deny inbound" rule as speficied in RFD 63 - // §2.8.1. - // - // RFD 63 §2.8.1 also states that all outbond traffic should - // be allowed by default, aka the "implied allow outbound" - // rule. Therefore, we set the default outbound action to - // allow. - let actions = LayerActions { - actions: vec![], - default_in: DefaultAction::Deny, - default_out: DefaultAction::StatefulAllow, - }; - - Layer::new(FW_LAYER_NAME, port_name, actions, ft_limit) - } -} - impl ProtoFilter { pub fn into_predicate(self) -> Option { match self { diff --git a/lib/oxide-vpc/src/engine/gateway/arp.rs b/lib/oxide-vpc/src/engine/gateway/arp.rs index d530ce16..659bcb0e 100644 --- a/lib/oxide-vpc/src/engine/gateway/arp.rs +++ b/lib/oxide-vpc/src/engine/gateway/arp.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2023 Oxide Computer Company +// Copyright 2025 Oxide Computer Company //! The ARP implementation of the Virtual Gateway. @@ -17,8 +17,13 @@ use opte::engine::predicate::EtherTypeMatch; use opte::engine::predicate::Predicate; use opte::engine::rule::Action; use opte::engine::rule::Rule; +use opte::engine::stat::StatTree; -pub fn setup(layer: &mut Layer, cfg: &VpcCfg) -> Result<(), OpteError> { +pub fn setup( + layer: &mut Layer, + cfg: &VpcCfg, + stats: &mut StatTree, +) -> Result<(), OpteError> { // ================================================================ // Outbound ARP Request for Gateway, from Guest // @@ -33,7 +38,7 @@ pub fn setup(layer: &mut Layer, cfg: &VpcCfg) -> Result<(), OpteError> { )]), Predicate::InnerEtherSrc(vec![EtherAddrMatch::Exact(cfg.guest_mac)]), ]); - layer.add_rule(Direction::Out, rule.finalize()); + layer.add_rule(Direction::Out, rule.finalize(), stats); Ok(()) } diff --git a/lib/oxide-vpc/src/engine/gateway/dhcp.rs b/lib/oxide-vpc/src/engine/gateway/dhcp.rs index d10698e6..e008cbc0 100644 --- a/lib/oxide-vpc/src/engine/gateway/dhcp.rs +++ b/lib/oxide-vpc/src/engine/gateway/dhcp.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2024 Oxide Computer Company +// Copyright 2025 Oxide Computer Company //! The DHCP implementation of the Virtual Gateway. @@ -21,12 +21,14 @@ use opte::engine::ip::v4::Ipv4Cidr; use opte::engine::layer::Layer; use opte::engine::rule::Action; use opte::engine::rule::Rule; +use opte::engine::stat::StatTree; pub fn setup( layer: &mut Layer, cfg: &VpcCfg, ip_cfg: &Ipv4Cfg, dhcp_cfg: DhcpCfg, + stats: &mut StatTree, ) -> Result<(), OpteError> { // All guest interfaces live on a `/32`-network in the Oxide VPC; // restricting the L2 domain to two nodes: the guest NIC and the @@ -91,9 +93,9 @@ pub fn setup( })); let discover_rule = Rule::new(1, offer); - layer.add_rule(Direction::Out, discover_rule.finalize()); + layer.add_rule(Direction::Out, discover_rule.finalize(), stats); let request_rule = Rule::new(1, ack); - layer.add_rule(Direction::Out, request_rule.finalize()); + layer.add_rule(Direction::Out, request_rule.finalize(), stats); Ok(()) } diff --git a/lib/oxide-vpc/src/engine/gateway/dhcpv6.rs b/lib/oxide-vpc/src/engine/gateway/dhcpv6.rs index 00bbec2a..071cd3b3 100644 --- a/lib/oxide-vpc/src/engine/gateway/dhcpv6.rs +++ b/lib/oxide-vpc/src/engine/gateway/dhcpv6.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2023 Oxide Computer Company +// Copyright 2025 Oxide Computer Company //! The DHCPv6 implementation of the Virtual Gateway. @@ -17,11 +17,13 @@ use opte::engine::dhcpv6::LeasedAddress; use opte::engine::layer::Layer; use opte::engine::rule::Action; use opte::engine::rule::Rule; +use opte::engine::stat::StatTree; pub fn setup( layer: &mut Layer, cfg: &VpcCfg, dhcp_cfg: DhcpCfg, + stats: &mut StatTree, ) -> Result<(), OpteError> { let ip_cfg = match cfg.ipv6_cfg() { None => return Ok(()), @@ -44,6 +46,6 @@ pub fn setup( let server = Action::Hairpin(Arc::new(action)); let rule = Rule::new(1, server); - layer.add_rule(Direction::Out, rule.finalize()); + layer.add_rule(Direction::Out, rule.finalize(), stats); Ok(()) } diff --git a/lib/oxide-vpc/src/engine/gateway/icmp.rs b/lib/oxide-vpc/src/engine/gateway/icmp.rs index c4c48550..c08d4067 100644 --- a/lib/oxide-vpc/src/engine/gateway/icmp.rs +++ b/lib/oxide-vpc/src/engine/gateway/icmp.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2023 Oxide Computer Company +// Copyright 2025 Oxide Computer Company //! The ICMP implementation of the Virtual Gateway. @@ -15,11 +15,13 @@ use opte::engine::icmp::v4::IcmpEchoReply; use opte::engine::layer::Layer; use opte::engine::rule::Action; use opte::engine::rule::Rule; +use opte::engine::stat::StatTree; pub fn setup( layer: &mut Layer, cfg: &VpcCfg, ip_cfg: &Ipv4Cfg, + stats: &mut StatTree, ) -> Result<(), OpteError> { // ================================================================ // ICMPv4 Echo Reply @@ -33,6 +35,6 @@ pub fn setup( echo_dst_ip: ip_cfg.gateway_ip, })); let rule = Rule::new(1, reply); - layer.add_rule(Direction::Out, rule.finalize()); + layer.add_rule(Direction::Out, rule.finalize(), stats); Ok(()) } diff --git a/lib/oxide-vpc/src/engine/gateway/icmpv6.rs b/lib/oxide-vpc/src/engine/gateway/icmpv6.rs index 2821325b..ee17ea45 100644 --- a/lib/oxide-vpc/src/engine/gateway/icmpv6.rs +++ b/lib/oxide-vpc/src/engine/gateway/icmpv6.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2023 Oxide Computer Company +// Copyright 2025 Oxide Computer Company //! The ICMPv6 implementation of the Virtual Gateway. @@ -19,6 +19,7 @@ use opte::engine::layer::Layer; use opte::engine::predicate::DataPredicate; use opte::engine::rule::Action; use opte::engine::rule::Rule; +use opte::engine::stat::StatTree; use smoltcp::wire::Icmpv6Message; // Add support for ICMPv6: @@ -38,6 +39,7 @@ pub fn setup( layer: &mut Layer, cfg: &VpcCfg, ip_cfg: &Ipv6Cfg, + stats: &mut StatTree, ) -> Result<(), OpteError> { let dst_ip = Ipv6Addr::from_eui64(&cfg.gateway_mac); let hairpins = [ @@ -87,7 +89,7 @@ pub fn setup( hairpins.into_iter().enumerate().for_each(|(i, action)| { let priority = u16::try_from(i + 1).unwrap(); let rule = Rule::new(priority, action); - layer.add_rule(Direction::Out, rule.finalize()); + layer.add_rule(Direction::Out, rule.finalize(), stats); }); // Filter any uncaught in/out-bound NDP traffic. @@ -99,11 +101,11 @@ pub fn setup( let mut ndp_filter = Rule::new(next_out_prio, Action::Deny); ndp_filter.add_data_predicate(pred); - layer.add_rule(Direction::Out, ndp_filter.finalize()); + layer.add_rule(Direction::Out, ndp_filter.finalize(), stats); let mut ndp_filter = Rule::new(1, Action::Deny); ndp_filter.add_data_predicate(in_pred); - layer.add_rule(Direction::In, ndp_filter.finalize()); + layer.add_rule(Direction::In, ndp_filter.finalize(), stats); Ok(()) } diff --git a/lib/oxide-vpc/src/engine/gateway/mod.rs b/lib/oxide-vpc/src/engine/gateway/mod.rs index b8d6a580..21076948 100644 --- a/lib/oxide-vpc/src/engine/gateway/mod.rs +++ b/lib/oxide-vpc/src/engine/gateway/mod.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2024 Oxide Computer Company +// Copyright 2025 Oxide Computer Company //! The Oxide VPC Virtual Gateway. //! @@ -89,7 +89,7 @@ pub use transit::*; pub const NAME: &str = "gateway"; pub fn setup( - pb: &PortBuilder, + pb: &mut PortBuilder, cfg: &VpcCfg, vpc_mappings: Arc, ft_limit: core::num::NonZeroU32, @@ -104,15 +104,16 @@ pub fn setup( // Since we are acting as a gateway we also rewrite the source MAC address // for inbound traffic to be that of the gateway. let actions = LayerActions { - actions: vec![], default_in: DefaultAction::Deny, default_out: DefaultAction::Deny, + ..Default::default() }; - let mut layer = Layer::new(NAME, pb.name(), actions, ft_limit); + let mut layer = Layer::new(NAME, pb, actions, ft_limit); if let Some(ipv4_cfg) = cfg.ipv4_cfg() { setup_ipv4( + pb, &mut layer, cfg, ipv4_cfg, @@ -122,7 +123,14 @@ pub fn setup( } if let Some(ipv6_cfg) = cfg.ipv6_cfg() { - setup_ipv6(&mut layer, cfg, ipv6_cfg, vpc_mappings, dhcp_cfg.clone())?; + setup_ipv6( + pb, + &mut layer, + cfg, + ipv6_cfg, + vpc_mappings, + dhcp_cfg.clone(), + )?; } pb.add_layer(layer, Pos::Before("firewall")) @@ -161,15 +169,17 @@ impl StaticAction for RewriteSrcMac { } fn setup_ipv4( + pb: &mut PortBuilder, layer: &mut Layer, cfg: &VpcCfg, ip_cfg: &Ipv4Cfg, vpc_mappings: Arc, dhcp_cfg: DhcpCfg, ) -> Result<(), OpteError> { - arp::setup(layer, cfg)?; - dhcp::setup(layer, cfg, ip_cfg, dhcp_cfg)?; - icmp::setup(layer, cfg, ip_cfg)?; + let stats = pb.stats_mut(); + arp::setup(layer, cfg, stats)?; + dhcp::setup(layer, cfg, ip_cfg, dhcp_cfg, stats)?; + icmp::setup(layer, cfg, ip_cfg, stats)?; let vpc_meta = Arc::new(VpcMeta::new(vpc_mappings)); @@ -180,7 +190,7 @@ fn setup_ipv4( nospoof_out.add_predicate(Predicate::InnerEtherSrc(vec![ EtherAddrMatch::Exact(cfg.guest_mac), ])); - layer.add_rule(Direction::Out, nospoof_out.finalize()); + layer.add_rule(Direction::Out, nospoof_out.finalize(), stats); let mut unicast_in = Rule::new( 1000, @@ -194,20 +204,22 @@ fn setup_ipv4( unicast_in.add_predicate(Predicate::InnerEtherDst(vec![ EtherAddrMatch::Exact(cfg.guest_mac), ])); - layer.add_rule(Direction::In, unicast_in.finalize()); + layer.add_rule(Direction::In, unicast_in.finalize(), stats); Ok(()) } fn setup_ipv6( + pb: &mut PortBuilder, layer: &mut Layer, cfg: &VpcCfg, ip_cfg: &Ipv6Cfg, vpc_mappings: Arc, dhcp_cfg: DhcpCfg, ) -> Result<(), OpteError> { - icmpv6::setup(layer, cfg, ip_cfg)?; - dhcpv6::setup(layer, cfg, dhcp_cfg)?; + let stats = pb.stats_mut(); + icmpv6::setup(layer, cfg, ip_cfg, stats)?; + dhcpv6::setup(layer, cfg, dhcp_cfg, stats)?; let vpc_meta = Arc::new(VpcMeta::new(vpc_mappings)); let mut nospoof_out = Rule::new(1000, Action::Meta(vpc_meta)); nospoof_out.add_predicate(Predicate::InnerSrcIp6(vec![ @@ -216,7 +228,7 @@ fn setup_ipv6( nospoof_out.add_predicate(Predicate::InnerEtherSrc(vec![ EtherAddrMatch::Exact(cfg.guest_mac), ])); - layer.add_rule(Direction::Out, nospoof_out.finalize()); + layer.add_rule(Direction::Out, nospoof_out.finalize(), stats); let mut unicast_in = Rule::new( 1000, @@ -230,7 +242,7 @@ fn setup_ipv6( unicast_in.add_predicate(Predicate::InnerEtherDst(vec![ EtherAddrMatch::Exact(cfg.guest_mac), ])); - layer.add_rule(Direction::In, unicast_in.finalize()); + layer.add_rule(Direction::In, unicast_in.finalize(), stats); Ok(()) } diff --git a/lib/oxide-vpc/src/engine/nat.rs b/lib/oxide-vpc/src/engine/nat.rs index 2251a246..698ae14d 100644 --- a/lib/oxide-vpc/src/engine/nat.rs +++ b/lib/oxide-vpc/src/engine/nat.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2023 Oxide Computer Company +// Copyright 2025 Oxide Computer Company use super::VpcNetwork; use super::router::ROUTER_LAYER_NAME; @@ -101,14 +101,14 @@ pub fn setup( // but no valid replacement source IP must be dropped, otherwise it will // be forwarded to boundary services. let actions = LayerActions { - actions: vec![], default_in: DefaultAction::Allow, default_out: DefaultAction::Allow, + ..Default::default() }; - let mut layer = Layer::new(NAT_LAYER_NAME, pb.name(), actions, ft_limit); + let mut layer = Layer::new(NAT_LAYER_NAME, pb, actions, ft_limit); let (in_rules, out_rules) = create_nat_rules(cfg, None)?; - layer.set_rules(in_rules, out_rules); + layer.set_rules(in_rules, out_rules, pb.stats_mut()); pb.add_layer(layer, Pos::After(ROUTER_LAYER_NAME)) } diff --git a/lib/oxide-vpc/src/engine/overlay.rs b/lib/oxide-vpc/src/engine/overlay.rs index 8a7cc1ec..600369ea 100644 --- a/lib/oxide-vpc/src/engine/overlay.rs +++ b/lib/oxide-vpc/src/engine/overlay.rs @@ -71,7 +71,7 @@ use poptrie::Poptrie; pub const OVERLAY_LAYER_NAME: &str = "overlay"; pub fn setup( - pb: &PortBuilder, + pb: &mut PortBuilder, cfg: &VpcCfg, v2p: Arc, v2b: Arc, @@ -92,14 +92,14 @@ pub fn setup( actions: vec![encap, decap], default_in: DefaultAction::Deny, default_out: DefaultAction::Deny, + ..Default::default() }; - let mut layer = - Layer::new(OVERLAY_LAYER_NAME, pb.name(), actions, ft_limit); + let mut layer = Layer::new(OVERLAY_LAYER_NAME, pb, actions, ft_limit); let encap_rule = Rule::match_any(1, layer.action(0).unwrap()); - layer.add_rule(Direction::Out, encap_rule); + layer.add_rule(Direction::Out, encap_rule, pb.stats_mut()); let decap_rule = Rule::match_any(1, layer.action(1).unwrap()); - layer.add_rule(Direction::In, decap_rule); + layer.add_rule(Direction::In, decap_rule, pb.stats_mut()); // NOTE The First/Last positions cannot fail; perhaps I should // improve the API to avoid the unwrap(). pb.add_layer(layer, Pos::Last) diff --git a/lib/oxide-vpc/src/engine/router.rs b/lib/oxide-vpc/src/engine/router.rs index c05f7882..e7c12f0f 100644 --- a/lib/oxide-vpc/src/engine/router.rs +++ b/lib/oxide-vpc/src/engine/router.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2024 Oxide Computer Company +// Copyright 2025 Oxide Computer Company //! The Oxide Network VPC Router. //! @@ -247,7 +247,7 @@ fn compute_rule_priority(cidr: &IpCidr, class: RouterClass) -> u16 { } pub fn setup( - pb: &PortBuilder, + pb: &mut PortBuilder, _cfg: &VpcCfg, ft_limit: core::num::NonZeroU32, ) -> Result<(), OpteError> { @@ -257,12 +257,12 @@ pub fn setup( // Outbound: If there is no matching route, then the packet should // make it no further. let actions = LayerActions { - actions: vec![], default_in: DefaultAction::Allow, default_out: DefaultAction::Deny, + ..Default::default() }; - let layer = Layer::new(ROUTER_LAYER_NAME, pb.name(), actions, ft_limit); + let layer = Layer::new(ROUTER_LAYER_NAME, pb, actions, ft_limit); pb.add_layer(layer, Pos::After(fw::FW_LAYER_NAME)) } From c73611fd0223eb20f0e6dfd325e2e8747aae2bf0 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Thu, 29 May 2025 15:11:21 +0100 Subject: [PATCH 04/37] Possibly actual LFTs, expiry. --- crates/opte-api/src/stat.rs | 1 + lib/opte-test-utils/src/port_state.rs | 5 + lib/opte/src/engine/layer.rs | 156 ++++++++++++++++------ lib/opte/src/engine/port/mod.rs | 31 ++++- lib/opte/src/engine/stat.rs | 160 +++++++++++++++++++++-- lib/opte/src/lib.rs | 6 + lib/oxide-vpc/tests/integration_tests.rs | 2 + xde/src/xde.rs | 6 +- 8 files changed, 309 insertions(+), 58 deletions(-) diff --git a/crates/opte-api/src/stat.rs b/crates/opte-api/src/stat.rs index f060ee10..846bf88f 100644 --- a/crates/opte-api/src/stat.rs +++ b/crates/opte-api/src/stat.rs @@ -22,6 +22,7 @@ pub struct FlowStat { #[derive(Deserialize, Serialize, Debug, Clone, Copy)] pub struct PacketCounter { + pub created_at: u64, pub pkts_in: u64, pub bytes_in: u64, pub pkts_out: u64, diff --git a/lib/opte-test-utils/src/port_state.rs b/lib/opte-test-utils/src/port_state.rs index 3a233f58..c39bafb4 100644 --- a/lib/opte-test-utils/src/port_state.rs +++ b/lib/opte-test-utils/src/port_state.rs @@ -86,6 +86,11 @@ pub fn print_port( write_hr(&mut out)?; writeln!(&mut out, "{:#?}", port.stats_snap())?; + // ================================================================ + // Print the Better Stats + // ================================================================ + writeln!(&mut out, "{}", port.dump_flow_stats().unwrap())?; + write_hrb(&mut out)?; writeln!(&mut out)?; diff --git a/lib/opte/src/engine/layer.rs b/lib/opte/src/engine/layer.rs index 2671dc17..f2ba7ab5 100644 --- a/lib/opte/src/engine/layer.rs +++ b/lib/opte/src/engine/layer.rs @@ -32,6 +32,7 @@ use super::rule::ht_probe; use super::stat::StatTree; use super::stat::TableStat; use crate::ExecCtx; +use crate::ExecCtx2; use crate::LogLevel; use crate::api::DumpLayerResp; use crate::d_error::DError; @@ -160,10 +161,31 @@ pub enum LftError { MaxCapacity, } +#[derive(Clone, Debug)] +struct LftInEntry { + action_desc: ActionDescEntry, + stat: Arc, +} + +impl Display for LftInEntry { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}", self.action_desc) + } +} + +impl Dump for LftInEntry { + type DumpVal = ActionDescEntryDump; + + fn dump(&self, hits: u64) -> Self::DumpVal { + ActionDescEntryDump { hits, summary: self.to_string() } + } +} + #[derive(Clone, Debug)] struct LftOutEntry { in_flow_pair: InnerFlowId, action_desc: ActionDescEntry, + stat: Arc, } impl LftOutEntry { @@ -189,7 +211,7 @@ impl Dump for LftOutEntry { struct LayerFlowTable { limit: NonZeroU32, count: u32, - ft_in: FlowTable, + ft_in: FlowTable, ft_out: FlowTable, } @@ -205,11 +227,15 @@ impl LayerFlowTable { action_desc: ActionDescEntry, in_flow: InnerFlowId, out_flow: InnerFlowId, + stat: Arc, ) { // We add unchekced because the limit is now enforced by // LayerFlowTable, not the individual flow tables. - self.ft_in.add_unchecked(in_flow, action_desc.clone()); - let out_entry = LftOutEntry { in_flow_pair: in_flow, action_desc }; + let in_entry = + LftInEntry { action_desc: action_desc.clone(), stat: stat.clone() }; + self.ft_in.add_unchecked(in_flow, in_entry); + let out_entry = + LftOutEntry { in_flow_pair: in_flow, action_desc, stat }; self.ft_out.add_unchecked(out_flow, out_entry); self.count += 1; } @@ -250,10 +276,12 @@ impl LayerFlowTable { match self.ft_in.get(flow) { Some(entry) => { entry.hit(); + let action = entry.state().action_desc.clone(); + let stat = entry.state().stat.clone(); if entry.is_dirty() { - EntryState::Dirty(entry.state().clone()) + EntryState::Dirty(action, stat) } else { - EntryState::Clean(entry.state().clone()) + EntryState::Clean(action, stat) } } @@ -266,10 +294,11 @@ impl LayerFlowTable { Some(entry) => { entry.hit(); let action = entry.state().action_desc.clone(); + let stat = entry.state().stat.clone(); if entry.is_dirty() { - EntryState::Dirty(action) + EntryState::Dirty(action, stat) } else { - EntryState::Clean(action) + EntryState::Clean(action, stat) } } @@ -280,7 +309,7 @@ impl LayerFlowTable { fn remove_in( &mut self, flow: &InnerFlowId, - ) -> Option>> { + ) -> Option>> { self.ft_in.remove(flow) } @@ -340,14 +369,14 @@ impl LayerFlowTable { } /// The result of a flowtable lookup. -pub enum EntryState { +enum EntryState { /// No flow entry was found matching a given flowid. None, /// An existing flow table entry was found. - Clean(ActionDescEntry), + Clean(ActionDescEntry, Arc), /// An existing flow table entry was found, but rule processing must be rerun /// to use the original action or invalidate the underlying entry. - Dirty(ActionDescEntry), + Dirty(ActionDescEntry, Arc), } /// The default action of a layer. @@ -815,7 +844,7 @@ impl Layer { pub(crate) fn process( &mut self, - ectx: &ExecCtx, + ectx: &mut ExecCtx2, dir: Direction, pkt: &mut Packet, xforms: &mut Transforms, @@ -834,7 +863,7 @@ impl Layer { fn process_in( &mut self, - ectx: &ExecCtx, + ectx: &mut ExecCtx2, pkt: &mut Packet, xforms: &mut Transforms, ameta: &mut ActionMeta, @@ -846,23 +875,27 @@ impl Layer { // Do we have a FlowTable entry? If so, use it. let flow = *pkt.flow(); - let action = match self.ft.get_in(&flow) { - EntryState::Dirty(ActionDescEntry::Desc(action)) + let (action, stat) = match self.ft.get_in(&flow) { + EntryState::Dirty(ActionDescEntry::Desc(action), stat) if action.is_valid() => { self.ft.mark_clean(Direction::In, &flow); - Some(ActionDescEntry::Desc(action)) + (Some(ActionDescEntry::Desc(action)), Some(stat)) } - EntryState::Dirty(_) => { + EntryState::Dirty(_, _) => { // NoOps are included in this case as we can't ask the actor whether // it remains valid: the simplest method to do so is to rerun lookup. self.ft.remove_in(&flow); - None + (None, None) } - EntryState::Clean(action) => Some(action), - EntryState::None => None, + EntryState::Clean(action, stat) => (Some(action), Some(stat)), + EntryState::None => (None, None), }; + if let Some(stat) = stat { + pkt.meta_mut().stats.push(stat); + } + match action { Some(ActionDescEntry::NoOp) => { self.stats.vals.in_lft_hit += 1; @@ -904,7 +937,7 @@ impl Layer { fn process_in_rules( &mut self, - ectx: &ExecCtx, + ectx: &mut ExecCtx2, pkt: &mut Packet, xforms: &mut Transforms, ameta: &mut ActionMeta, @@ -924,6 +957,8 @@ impl Layer { }; // No LFT to account for. + // TODO: figure out how to have actions push on some IDs + // that then belong to the LFT. let mut stat = Some(stat); if !matches!(action, Action::StatefulAllow | Action::Stateful(_)) { pkt.meta_mut().stats.push(stat.take().unwrap()); @@ -941,7 +976,9 @@ impl Layer { }); } - // TODO: how on earth are we plumbing StatTree into here?? + let stat = + ectx.stats.new_intermediate(vec![stat.take().unwrap()]); + pkt.meta_mut().stats.push(stat.clone()); // The outbound flow ID mirrors the inbound. Remember, // the "top" of layer represents how the client sees @@ -949,7 +986,7 @@ impl Layer { // represents how the network sees the traffic. let flow_out = pkt.flow().mirror(); let desc = ActionDescEntry::NoOp; - self.ft.add_pair(desc, *pkt.flow(), flow_out); + self.ft.add_pair(desc, *pkt.flow(), flow_out, stat); self.stats.vals.flows += 1; Ok(LayerResult::Allow) } @@ -993,7 +1030,12 @@ impl Layer { }, Err(e) => { - self.record_gen_ht_failure(ectx, In, pkt.flow(), &e); + self.record_gen_ht_failure( + ectx.user_ctx, + In, + pkt.flow(), + &e, + ); return Err(LayerError::GenHdrTransform { layer: self.name, err: e, @@ -1068,7 +1110,12 @@ impl Layer { }, Err(e) => { - self.record_gen_desc_failure(ectx, In, pkt.flow(), &e); + self.record_gen_desc_failure( + ectx.user_ctx, + In, + pkt.flow(), + &e, + ); return Err(LayerError::GenDesc(e)); } }; @@ -1092,6 +1139,10 @@ impl Layer { } } + let stat = + ectx.stats.new_intermediate(vec![stat.take().unwrap()]); + pkt.meta_mut().stats.push(stat.clone()); + // The outbound flow ID must be calculated _after_ the // header transformation. Remember, the "top" // (outbound) of layer represents how the client sees @@ -1104,6 +1155,7 @@ impl Layer { ActionDescEntry::Desc(desc), flow_before, flow_out, + stat, ); self.stats.vals.flows += 1; Ok(LayerResult::Allow) @@ -1131,7 +1183,7 @@ impl Layer { fn process_out( &mut self, - ectx: &ExecCtx, + ectx: &mut ExecCtx2, pkt: &mut Packet, xforms: &mut Transforms, ameta: &mut ActionMeta, @@ -1143,23 +1195,27 @@ impl Layer { // Do we have a FlowTable entry? If so, use it. let flow = *pkt.flow(); - let action = match self.ft.get_out(&flow) { - EntryState::Dirty(ActionDescEntry::Desc(action)) + let (action, stat) = match self.ft.get_out(&flow) { + EntryState::Dirty(ActionDescEntry::Desc(action), stat) if action.is_valid() => { self.ft.mark_clean(Direction::Out, &flow); - Some(ActionDescEntry::Desc(action)) + (Some(ActionDescEntry::Desc(action)), Some(stat)) } - EntryState::Dirty(_) => { + EntryState::Dirty(_, _) => { // NoOps are included in this case as we can't ask the actor whether // it remains valid: the simplest method to do so is to rerun lookup. self.ft.remove_out(&flow); - None + (None, None) } - EntryState::Clean(action) => Some(action), - EntryState::None => None, + EntryState::Clean(action, stat) => (Some(action), Some(stat)), + EntryState::None => (None, None), }; + if let Some(stat) = stat { + pkt.meta_mut().stats.push(stat); + } + match action { Some(ActionDescEntry::NoOp) => { self.stats.vals.out_lft_hit += 1; @@ -1201,7 +1257,7 @@ impl Layer { fn process_out_rules( &mut self, - ectx: &ExecCtx, + ectx: &mut ExecCtx2, pkt: &mut Packet, xforms: &mut Transforms, ameta: &mut ActionMeta, @@ -1221,6 +1277,8 @@ impl Layer { }; // No LFT to account for. + // TODO: figure out how to have actions push on some IDs + // that then belong to the LFT. let mut stat = Some(stat); if !matches!(action, Action::StatefulAllow | Action::Stateful(_)) { pkt.meta_mut().stats.push(stat.take().unwrap()); @@ -1238,7 +1296,9 @@ impl Layer { }); } - // TODO: how on earth are we plumbing StatTree into here?? + let stat = + ectx.stats.new_intermediate(vec![stat.take().unwrap()]); + pkt.meta_mut().stats.push(stat.clone()); // The inbound flow ID must be calculated _after_ the // header transformation. Remember, the "top" @@ -1248,7 +1308,12 @@ impl Layer { // The final step is to mirror the IPs and ports to // reflect the traffic direction change. let flow_in = pkt.flow().mirror(); - self.ft.add_pair(ActionDescEntry::NoOp, flow_in, *pkt.flow()); + self.ft.add_pair( + ActionDescEntry::NoOp, + flow_in, + *pkt.flow(), + stat, + ); self.stats.vals.flows += 1; Ok(LayerResult::Allow) } @@ -1292,7 +1357,12 @@ impl Layer { }, Err(e) => { - self.record_gen_ht_failure(ectx, Out, pkt.flow(), &e); + self.record_gen_ht_failure( + ectx.user_ctx, + Out, + pkt.flow(), + &e, + ); return Err(LayerError::GenHdrTransform { layer: self.name, err: e, @@ -1352,7 +1422,9 @@ impl Layer { }); } - // TODO: how on earth are we plumbing StatTree into here?? + let stat = + ectx.stats.new_intermediate(vec![stat.take().unwrap()]); + pkt.meta_mut().stats.push(stat.clone()); let desc = match action.gen_desc(pkt.flow(), pkt, ameta) { Ok(aord) => match aord { @@ -1367,7 +1439,12 @@ impl Layer { }, Err(e) => { - self.record_gen_desc_failure(ectx, Out, pkt.flow(), &e); + self.record_gen_desc_failure( + ectx.user_ctx, + Out, + pkt.flow(), + &e, + ); return Err(LayerError::GenDesc(e)); } }; @@ -1404,6 +1481,7 @@ impl Layer { ActionDescEntry::Desc(desc), flow_in, flow_before, + stat, ); self.stats.vals.flows += 1; Ok(LayerResult::Allow) diff --git a/lib/opte/src/engine/port/mod.rs b/lib/opte/src/engine/port/mod.rs index a59c6bd2..7c2aeb1d 100644 --- a/lib/opte/src/engine/port/mod.rs +++ b/lib/opte/src/engine/port/mod.rs @@ -52,6 +52,7 @@ use super::tcp::TIME_WAIT_EXPIRE_TTL; use super::tcp_state::TcpFlowState; use super::tcp_state::TcpFlowStateError; use crate::ExecCtx; +use crate::ExecCtx2; use crate::api::DumpLayerResp; use crate::api::DumpTcpFlowsResp; use crate::api::DumpUftResp; @@ -608,7 +609,10 @@ impl fmt::Debug for UftEntry { .field("l4_hash", l4_hash) .field("epoch", epoch) .field("tcp_flow", tcp_flow) - .field("stats", &"TODO") + .field( + "stats", + &crate::api::FlowStat::::from(stat.as_ref()), + ) .finish() } } @@ -1020,6 +1024,18 @@ impl Port { Ok(DumpTcpFlowsResp { flows: data.tcp_flows.dump() }) } + #[cfg(any(feature = "std", test))] + /// XXX TEST METHOD + pub fn dump_flow_stats(&self) -> Result { + let data = self.data.read(); + check_state!( + data.state, + [PortState::Running, PortState::Paused, PortState::Restored] + )?; + + Ok(data.flow_stats.dump()) + } + /// Clear all entries from the Unified Flow Table (UFT). /// /// # States @@ -1131,6 +1147,8 @@ impl Port { // set TIME_WAIT_EXPIRE_TTL or another state-specific timer lower // than 60s, we'll need to specifically expire the matching UFTs. let _ = data.tcp_flows.expire_flows(now, |_| FLOW_ID_DEFAULT); + + data.flow_stats.expire(now); Ok(()) } @@ -2010,11 +2028,13 @@ impl Port { xforms: &mut Transforms, ameta: &mut ActionMeta, ) -> result::Result { + let mut ectx = + ExecCtx2 { user_ctx: &self.ectx, stats: &mut data.flow_stats }; + match dir { Direction::Out => { for layer in &mut data.layers { - let res = - layer.process(&self.ectx, dir, pkt, xforms, ameta); + let res = layer.process(&mut ectx, dir, pkt, xforms, ameta); match res { Ok(LayerResult::Allow) => (), @@ -2028,8 +2048,7 @@ impl Port { Direction::In => { for layer in data.layers.iter_mut().rev() { - let res = - layer.process(&self.ectx, dir, pkt, xforms, ameta); + let res = layer.process(&mut ectx, dir, pkt, xforms, ameta); match res { Ok(LayerResult::Allow) => (), @@ -2413,6 +2432,7 @@ impl Port { let ufid_out = pkt.flow().mirror(); let stat = data.flow_stats.new_flow(ufid_in, &ufid_out, In, stat_parents); + stat.hit(pkt_len); let mut hte = UftEntry { pair: KMutex::new(Some(ufid_out)), xforms: xforms.compile(flags), @@ -2693,6 +2713,7 @@ impl Port { Out, stat_parents, ); + stat.hit(pkt_len); let hte = UftEntry { pair: KMutex::new(None), diff --git a/lib/opte/src/engine/stat.rs b/lib/opte/src/engine/stat.rs index 3742be95..129b328a 100644 --- a/lib/opte/src/engine/stat.rs +++ b/lib/opte/src/engine/stat.rs @@ -10,6 +10,7 @@ use crate::api::InnerFlowId; use crate::ddi::sync::KRwLock; use crate::ddi::sync::KRwLockType; use crate::ddi::time::Moment; +use crate::engine::flow_table::Ttl; use alloc::collections::BTreeMap; use alloc::collections::BTreeSet; use alloc::collections::btree_map::Entry; @@ -25,8 +26,7 @@ use opte_api::PacketCounter as ApiPktCounter; use opte_api::TcpState; use uuid::Uuid; -// TODO EXPIRY -// TODO DELETION +// TODO DELETION ON FLOW CLOSE [and holding onto 'dead flows'] /// Opaque identifier for tracking unique stat objects. #[derive(Copy, Clone, Hash, PartialEq, PartialOrd, Eq, Ord, Debug)] @@ -161,6 +161,7 @@ impl TableStat { pub struct PacketCounter { pub id: StatId, + pub created_at: Moment, pub pkts_in: AtomicU64, pub bytes_in: AtomicU64, @@ -172,6 +173,8 @@ impl PacketCounter { fn from_next_id(id: &mut u64) -> PacketCounter { PacketCounter { id: StatId::new(id), + created_at: Moment::now(), + pkts_in: 0.into(), bytes_in: 0.into(), pkts_out: 0.into(), @@ -210,6 +213,7 @@ impl PacketCounter { impl From<&PacketCounter> for ApiPktCounter { fn from(val: &PacketCounter) -> Self { ApiPktCounter { + created_at: val.created_at.raw(), pkts_in: val.pkts_in.load(Ordering::Relaxed), bytes_in: val.bytes_in.load(Ordering::Relaxed), pkts_out: val.pkts_out.load(Ordering::Relaxed), @@ -393,27 +397,139 @@ impl StatTree { out } - #[cfg(test)] + pub fn expire(&mut self, now: Moment) { + const EXPIRY_WINDOW: Ttl = Ttl::new_seconds(10); + // Root removal and re-entry? Don't want any gaps. + const ROOT_EXPIRY_WINDOW: Ttl = Ttl::new_seconds(100); + + #[derive(Default, Eq, PartialEq)] + enum Hmm { + #[default] + NotSeen, + SeenKeep, + Seen(InnerFlowId), + } + + #[derive(Default)] + struct Aa { + lhs: Hmm, + rhs: Hmm, + } + + // Flows -- need to account for shared component between arc'd things. + let mut possibly_expired: BTreeMap = BTreeMap::new(); + for (k, v) in &self.flows { + let t_hit = + Moment::from_raw_nanos(v.last_hit.load(Ordering::Relaxed)); + let can_remove = EXPIRY_WINDOW.is_expired(t_hit, now) + && Arc::strong_count(v) == 1; + let base_id = v.shared.stats.id; + let el = possibly_expired.entry(base_id).or_default(); + match (v.dir, can_remove) { + (Direction::In, false) => { + el.lhs = Hmm::SeenKeep; + } + (Direction::Out, false) => { + el.rhs = Hmm::SeenKeep; + } + (Direction::In, true) => { + el.lhs = Hmm::Seen(*k); + } + (Direction::Out, true) => { + el.rhs = Hmm::Seen(*k); + } + } + } + for v in possibly_expired.values() { + let cannot_remove = v.lhs == Hmm::SeenKeep + || v.rhs == Hmm::SeenKeep + || (v.lhs == Hmm::NotSeen && v.rhs == Hmm::NotSeen); + if cannot_remove { + continue; + } + + #[allow(clippy::mutable_key_type)] + let mut parents: BTreeSet = Default::default(); + let mut base_stats = None; + if let Hmm::Seen(id) = v.lhs { + if let Some(flow) = self.flows.remove(&id) { + let flow = Arc::into_inner(flow) + .expect("strong count 1 is enforced above"); + for p_id in flow.parents { + parents.insert(ById(p_id)); + } + base_stats = Some(flow.shared); + } + } + if let Hmm::Seen(id) = v.rhs { + if let Some(flow) = self.flows.remove(&id) { + let flow = Arc::into_inner(flow) + .expect("strong count 1 is enforced above"); + for p_id in flow.parents { + parents.insert(ById(p_id)); + } + base_stats = Some(flow.shared); + } + } + + // At long last, combine! + let base_stats = + base_stats.expect("should not have no parent here!!"); + for parent in parents { + base_stats.stats.combine(&parent.0.stats.packets); + } + } + + // Intermediates. + self.intermediate.retain(|v| { + // Time is... not relevant here. The LFTs are GONE. + if Arc::strong_count(v) == 1 { + for p in &v.parents { + v.stats.combine(&p.stats); + } + false + } else { + true + } + }); + + // Roots may need to be held onto for some time in case rules with the + // same ID come and go in adjacent control plane operations... + self.roots.retain(|_, v| { + let t_hit = + Moment::from_raw_nanos(v.last_hit.load(Ordering::Relaxed)); + Arc::strong_count(v) > 1 + || !ROOT_EXPIRY_WINDOW.is_expired(t_hit, now) + }); + } + + #[cfg(any(feature = "std", test))] pub fn dump(&self) -> String { let mut out = String::new(); - out.push_str("Roots\n"); + out.push_str("--Roots--\n"); for (id, root) in &self.roots { let d = ApiFullCounter::from(&root.stats); out.push_str(&format!("\t{:?}/{id} -> {d:?}\n", root.stats.id())); } - out.push_str("Ints\n"); + out.push_str("----\n\n"); + out.push_str("--Ints--\n"); for root in &self.intermediate { let d = ApiFullCounter::from(&root.stats); out.push_str(&format!("\t{:?} -> {d:?}\n", root.stats.id())); + let parents: Vec> = + root.parents.iter().map(|v| v.id).collect(); + out.push_str(&format!("\t\tparents {parents:?}\n\n")); } - out.push_str("Flows\n"); + out.push_str("----\n\n"); + out.push_str("--Flows--\n"); for (id, stat) in &self.flows { - let d: ApiFlowStat = stat.as_ref().into(); - out.push_str(&format!( - "\t{}/{}/{:?} -> {d:?}\n", - id, stat.dir, stat.shared.stats.id - )); + // let d: ApiFlowStat = stat.as_ref().into(); + let d: ApiPktCounter = (&stat.as_ref().shared.stats).into(); + out.push_str(&format!("\t{id}/{} ->\n", stat.dir)); + out.push_str(&format!("\t\t{:?} {d:?}\n", stat.shared.stats.id)); + out.push_str(&format!("\t\tparents {:?}\n\n", stat.bases)); } + out.push_str("----\n"); out } } @@ -498,3 +614,25 @@ impl Default for FlowStatBuilder { Self::new() } } + +struct ById(Arc); + +impl PartialOrd for ById { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for ById { + fn cmp(&self, other: &Self) -> core::cmp::Ordering { + self.0.stats.id().cmp(&other.0.stats.id()) + } +} + +impl PartialEq for ById { + fn eq(&self, other: &Self) -> bool { + self.0.stats.id() == other.0.stats.id() + } +} + +impl Eq for ById {} diff --git a/lib/opte/src/lib.rs b/lib/opte/src/lib.rs index 1adb074c..2838a5e9 100644 --- a/lib/opte/src/lib.rs +++ b/lib/opte/src/lib.rs @@ -31,6 +31,7 @@ extern crate self as opte; use alloc::boxed::Box; use core::fmt; use core::fmt::Display; +use engine::stat::StatTree; pub use ingot; @@ -256,3 +257,8 @@ impl LogProvider for KernelLog { pub struct ExecCtx { pub log: Box, } + +pub(crate) struct ExecCtx2<'a> { + pub user_ctx: &'a ExecCtx, + pub stats: &'a mut StatTree, +} diff --git a/lib/oxide-vpc/tests/integration_tests.rs b/lib/oxide-vpc/tests/integration_tests.rs index 08c173a5..fed5c883 100644 --- a/lib/oxide-vpc/tests/integration_tests.rs +++ b/lib/oxide-vpc/tests/integration_tests.rs @@ -3134,6 +3134,8 @@ fn uft_lft_invalidation_out() { "incr:stats.port.out_uft_miss", ] ); + + print_port(&g1.port, &g1.vpc_map); } // Verify that changing rules causes invalidation of UFT and LFT diff --git a/xde/src/xde.rs b/xde/src/xde.rs index db1cfabe..fc678372 100644 --- a/xde/src/xde.rs +++ b/xde/src/xde.rs @@ -2030,10 +2030,10 @@ fn new_port( // XXX some layers have no need for LFT, perhaps have two types // of Layer: one with, one without? - gateway::setup(&pb, &cfg, vpc_map, FT_LIMIT_ONE, dhcp_cfg)?; - router::setup(&pb, &cfg, FT_LIMIT_ONE)?; + gateway::setup(&mut pb, &cfg, vpc_map, FT_LIMIT_ONE, dhcp_cfg)?; + router::setup(&mut pb, &cfg, FT_LIMIT_ONE)?; nat::setup(&mut pb, &cfg, nat_ft_limit)?; - overlay::setup(&pb, &cfg, v2p, v2b, FT_LIMIT_ONE)?; + overlay::setup(&mut pb, &cfg, v2p, v2b, FT_LIMIT_ONE)?; // Set the overall unified flow and TCP flow table limits based on the total // configuration above, by taking the maximum of size of the individual From 923f9ee8a5f2d652f4558fddaa45f141d94bf846 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Thu, 29 May 2025 15:12:45 +0100 Subject: [PATCH 05/37] Plumbed a demo method. --- bin/opteadm/src/bin/opteadm.rs | 14 ++++++++++++++ crates/opte-api/src/cmd.rs | 3 +++ lib/opte-ioctl/src/lib.rs | 11 +++++++++++ lib/opte/src/engine/port/mod.rs | 1 - lib/opte/src/engine/stat.rs | 8 ++++++-- lib/oxide-vpc/src/api.rs | 8 ++++++++ xde/src/xde.rs | 16 ++++++++++++++++ 7 files changed, 58 insertions(+), 3 deletions(-) diff --git a/bin/opteadm/src/bin/opteadm.rs b/bin/opteadm/src/bin/opteadm.rs index fd3e4f7d..6a3e4171 100644 --- a/bin/opteadm/src/bin/opteadm.rs +++ b/bin/opteadm/src/bin/opteadm.rs @@ -32,6 +32,7 @@ use oxide_vpc::api::ClearVirt2PhysReq; use oxide_vpc::api::DelRouterEntryReq; use oxide_vpc::api::DelRouterEntryResp; use oxide_vpc::api::DhcpCfg; +use oxide_vpc::api::DumpFlowStatsResp; use oxide_vpc::api::ExternalIpCfg; use oxide_vpc::api::Filters as FirewallFilters; use oxide_vpc::api::FirewallAction; @@ -276,6 +277,13 @@ enum Command { #[arg(long = "dir")] direction: Option, }, + + /// XXX TEMP + DumpFlowStats { + /// The OPTE port to read... + #[arg(short)] + port: String, + } } #[derive(Debug, Parser)] @@ -859,6 +867,12 @@ fn main() -> anyhow::Result<()> { })?; } } + + // XXX TEMP + Command::DumpFlowStats { port } => { + let DumpFlowStatsResp{ data } = hdl.dump_flowstats(&port)?; + println!("{data}"); + } } Ok(()) diff --git a/crates/opte-api/src/cmd.rs b/crates/opte-api/src/cmd.rs index 6ec2848d..1d788f8d 100644 --- a/crates/opte-api/src/cmd.rs +++ b/crates/opte-api/src/cmd.rs @@ -49,6 +49,9 @@ pub enum OpteCmd { SetExternalIps = 80, // set xde external IPs for a port AllowCidr = 90, // allow ip block through gateway tx/rx RemoveCidr = 91, // deny ip block through gateway tx/rx + + // TEMP + DumpFlowStats = 34, } impl TryFrom for OpteCmd { diff --git a/lib/opte-ioctl/src/lib.rs b/lib/opte-ioctl/src/lib.rs index 553da16d..17cef73a 100644 --- a/lib/opte-ioctl/src/lib.rs +++ b/lib/opte-ioctl/src/lib.rs @@ -34,6 +34,7 @@ use oxide_vpc::api::DelRouterEntryReq; use oxide_vpc::api::DelRouterEntryResp; use oxide_vpc::api::DeleteXdeReq; use oxide_vpc::api::DhcpCfg; +use oxide_vpc::api::DumpFlowStatsResp; use oxide_vpc::api::DumpVirt2BoundaryResp; use oxide_vpc::api::DumpVirt2PhysResp; use oxide_vpc::api::IpCidr; @@ -368,6 +369,16 @@ impl OpteHdl { Some(&DumpUftReq { port_name: port_name.to_string() }), ) } + + /// TEMP METHOD + pub fn dump_flowstats(&self, port_name: &str) -> Result { + let cmd = OpteCmd::DumpFlowStats; + run_cmd_ioctl( + self.device.as_raw_fd(), + cmd, + Some(&DumpUftReq { port_name: port_name.to_string() }), + ) + } } pub fn run_cmd_ioctl( diff --git a/lib/opte/src/engine/port/mod.rs b/lib/opte/src/engine/port/mod.rs index 7c2aeb1d..739943af 100644 --- a/lib/opte/src/engine/port/mod.rs +++ b/lib/opte/src/engine/port/mod.rs @@ -1024,7 +1024,6 @@ impl Port { Ok(DumpTcpFlowsResp { flows: data.tcp_flows.dump() }) } - #[cfg(any(feature = "std", test))] /// XXX TEST METHOD pub fn dump_flow_stats(&self) -> Result { let data = self.data.read(); diff --git a/lib/opte/src/engine/stat.rs b/lib/opte/src/engine/stat.rs index 129b328a..d03f453c 100644 --- a/lib/opte/src/engine/stat.rs +++ b/lib/opte/src/engine/stat.rs @@ -14,6 +14,7 @@ use crate::engine::flow_table::Ttl; use alloc::collections::BTreeMap; use alloc::collections::BTreeSet; use alloc::collections::btree_map::Entry; +use alloc::string::String; use alloc::sync::Arc; use alloc::sync::Weak; use alloc::vec::Vec; @@ -503,7 +504,7 @@ impl StatTree { }); } - #[cfg(any(feature = "std", test))] + // TEMP pub fn dump(&self) -> String { let mut out = String::new(); out.push_str("--Roots--\n"); @@ -525,9 +526,12 @@ impl StatTree { for (id, stat) in &self.flows { // let d: ApiFlowStat = stat.as_ref().into(); let d: ApiPktCounter = (&stat.as_ref().shared.stats).into(); + let parents: Vec<_> = + stat.parents.iter().map(|v| v.stats.id()).collect(); out.push_str(&format!("\t{id}/{} ->\n", stat.dir)); out.push_str(&format!("\t\t{:?} {d:?}\n", stat.shared.stats.id)); - out.push_str(&format!("\t\tparents {:?}\n\n", stat.bases)); + out.push_str(&format!("\t\tparents {:?}\n", parents)); + out.push_str(&format!("\t\tbases {:?}\n\n", stat.bases)); } out.push_str("----\n"); out diff --git a/lib/oxide-vpc/src/api.rs b/lib/oxide-vpc/src/api.rs index 78ac6634..68034c29 100644 --- a/lib/oxide-vpc/src/api.rs +++ b/lib/oxide-vpc/src/api.rs @@ -641,6 +641,14 @@ pub struct FirewallRule { pub priority: u16, } +// TEMP +#[derive(Debug, Deserialize, Serialize)] +pub struct DumpFlowStatsResp { + pub data: String, +} + +impl CmdOk for DumpFlowStatsResp {} + impl FromStr for FirewallRule { type Err = String; diff --git a/xde/src/xde.rs b/xde/src/xde.rs index fc678372..fc6f5f82 100644 --- a/xde/src/xde.rs +++ b/xde/src/xde.rs @@ -684,6 +684,22 @@ unsafe extern "C" fn xde_ioc_opte_cmd(karg: *mut c_void, mode: c_int) -> c_int { let resp = remove_cidr_hdlr(&mut env); hdlr_resp(&mut env, resp) } + + // TEMP + OpteCmd::DumpFlowStats => { + let resp = flow_stats_hdlr(&mut env); + hdlr_resp(&mut env, resp) + } + } +} + +#[unsafe(no_mangle)] +fn flow_stats_hdlr(env: &mut IoctlEnvelope) -> Result { + let req: oxide_vpc::api::DumpUftReq = env.copy_in_req()?; + let devs = xde_devs().read(); + match devs.get_by_name(&req.port_name) { + Some(dev) => dev.port.dump_flow_stats().map(|data| oxide_vpc::api::DumpFlowStatsResp {data}), + None => Err(OpteError::PortNotFound(req.port_name)), } } From 1336572b44604144772a747949ae9043d7b42427 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Fri, 2 May 2025 17:26:37 +0100 Subject: [PATCH 06/37] Why not name some rules? --- lib/oxide-vpc/src/{api.rs => api/mod.rs} | 4 ++- lib/oxide-vpc/src/api/stat.rs | 33 ++++++++++++++++++++++++ lib/oxide-vpc/src/engine/firewall.rs | 3 +++ lib/oxide-vpc/src/engine/gateway/mod.rs | 3 +++ lib/oxide-vpc/src/engine/nat.rs | 7 +++-- lib/oxide-vpc/src/engine/router.rs | 2 ++ 6 files changed, 49 insertions(+), 3 deletions(-) rename lib/oxide-vpc/src/{api.rs => api/mod.rs} (99%) create mode 100644 lib/oxide-vpc/src/api/stat.rs diff --git a/lib/oxide-vpc/src/api.rs b/lib/oxide-vpc/src/api/mod.rs similarity index 99% rename from lib/oxide-vpc/src/api.rs rename to lib/oxide-vpc/src/api/mod.rs index 68034c29..d8488295 100644 --- a/lib/oxide-vpc/src/api.rs +++ b/lib/oxide-vpc/src/api/mod.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2024 Oxide Computer Company +// Copyright 2025 Oxide Computer Company use alloc::collections::BTreeMap; use alloc::collections::BTreeSet; @@ -19,6 +19,8 @@ use serde::Deserialize; use serde::Serialize; use uuid::Uuid; +pub mod stat; + /// This is the MAC address that OPTE uses to act as the virtual gateway. pub const GW_MAC_ADDR: MacAddr = MacAddr::from_const([0xA8, 0x40, 0x25, 0xFF, 0x77, 0x77]); diff --git a/lib/oxide-vpc/src/api/stat.rs b/lib/oxide-vpc/src/api/stat.rs new file mode 100644 index 00000000..20cfcb00 --- /dev/null +++ b/lib/oxide-vpc/src/api/stat.rs @@ -0,0 +1,33 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +// Copyright 2025 Oxide Computer Company + +//! Stat IDs for the Oxide VPC API. + +use uuid::Uuid; + +pub static FW_DEFAULT_IN: Uuid = + Uuid::from_fields(0x01de_f00d, 0x7777, 0x0000, &0u64.to_be_bytes()); +pub static FW_DEFAULT_OUT: Uuid = + Uuid::from_fields(0x01de_f00d, 0x7777, 0x0000, &1u64.to_be_bytes()); + +pub static GATEWAY_NOSPOOF_IN: Uuid = + Uuid::from_fields(0x01de_f00d, 0x7777, 0x0001, &0u64.to_be_bytes()); +pub static GATEWAY_NOSPOOF_OUT: Uuid = + Uuid::from_fields(0x01de_f00d, 0x7777, 0x0001, &1u64.to_be_bytes()); + +pub static ROUTER_NOROUTE: Uuid = + Uuid::from_fields(0x01de_f00d, 0x7777, 0x0002, &0u64.to_be_bytes()); + +pub static NAT_SNAT_V4: Uuid = + Uuid::from_fields(0x01de_f00d, 0x7777, 0x0003, &0u64.to_be_bytes()); +pub static NAT_SNAT_V6: Uuid = + Uuid::from_fields(0x01de_f00d, 0x7777, 0x0003, &1u64.to_be_bytes()); +pub static NAT_VALID_IGW_V4: Uuid = + Uuid::from_fields(0x01de_f00d, 0x7777, 0x0003, &2u64.to_be_bytes()); +pub static NAT_VALID_IGW_V6: Uuid = + Uuid::from_fields(0x01de_f00d, 0x7777, 0x0003, &3u64.to_be_bytes()); +pub static NAT_NONE: Uuid = + Uuid::from_fields(0x01de_f00d, 0x7777, 0x0003, &255u64.to_be_bytes()); \ No newline at end of file diff --git a/lib/oxide-vpc/src/engine/firewall.rs b/lib/oxide-vpc/src/engine/firewall.rs index 998163e9..73f670d3 100644 --- a/lib/oxide-vpc/src/engine/firewall.rs +++ b/lib/oxide-vpc/src/engine/firewall.rs @@ -18,6 +18,7 @@ use crate::api::Ports; pub use crate::api::ProtoFilter; use crate::api::RemFwRuleReq; use crate::api::SetFwRulesReq; +use crate::api::stat::*; use crate::engine::overlay::ACTION_META_VNI; use alloc::string::ToString; use core::num::NonZeroU32; @@ -60,7 +61,9 @@ pub fn setup( // allow. let actions = LayerActions { default_in: DefaultAction::Deny, + default_in_stat_id: Some(FW_DEFAULT_IN), default_out: DefaultAction::StatefulAllow, + default_out_stat_id: Some(FW_DEFAULT_OUT), ..Default::default() }; diff --git a/lib/oxide-vpc/src/engine/gateway/mod.rs b/lib/oxide-vpc/src/engine/gateway/mod.rs index 21076948..6b1290ba 100644 --- a/lib/oxide-vpc/src/engine/gateway/mod.rs +++ b/lib/oxide-vpc/src/engine/gateway/mod.rs @@ -77,6 +77,7 @@ use opte::engine::rule::MetaAction; use opte::engine::rule::ModMetaResult; use opte::engine::rule::Rule; use opte::engine::rule::StaticAction; +use crate::api::stat::*; pub mod arp; pub mod dhcp; @@ -105,7 +106,9 @@ pub fn setup( // for inbound traffic to be that of the gateway. let actions = LayerActions { default_in: DefaultAction::Deny, + default_in_stat_id: Some(GATEWAY_NOSPOOF_IN), default_out: DefaultAction::Deny, + default_out_stat_id: Some(GATEWAY_NOSPOOF_IN), ..Default::default() }; diff --git a/lib/oxide-vpc/src/engine/nat.rs b/lib/oxide-vpc/src/engine/nat.rs index 698ae14d..ce202fa1 100644 --- a/lib/oxide-vpc/src/engine/nat.rs +++ b/lib/oxide-vpc/src/engine/nat.rs @@ -47,6 +47,7 @@ use opte::engine::rule::Rule; use opte::engine::snat::ConcreteIpAddr; use opte::engine::snat::SNat; use uuid::Uuid; +use crate::api::stat::*; pub const NAT_LAYER_NAME: &str = "nat"; const FLOATING_ONE_TO_ONE_NAT_PRIORITY: u16 = 5; @@ -102,7 +103,9 @@ pub fn setup( // be forwarded to boundary services. let actions = LayerActions { default_in: DefaultAction::Allow, + default_in_stat_id: Some(NAT_NONE), default_out: DefaultAction::Allow, + default_out_stat_id: Some(NAT_NONE), ..Default::default() }; @@ -289,7 +292,7 @@ fn setup_ipv4_nat( for igw_id in igw_matches { let mut rule = - Rule::new(SNAT_PRIORITY, Action::Stateful(snat.clone())); + Rule::new_with_id(SNAT_PRIORITY, Action::Stateful(snat.clone()), Some(NAT_SNAT_V4)); rule.add_predicate(Predicate::InnerEtherType(vec![ EtherTypeMatch::Exact(ETHER_TYPE_IPV4), @@ -438,7 +441,7 @@ fn setup_ipv6_nat( for igw_id in igw_matches { let mut rule = - Rule::new(SNAT_PRIORITY, Action::Stateful(snat.clone())); + Rule::new_with_id(SNAT_PRIORITY, Action::Stateful(snat.clone()), Some(NAT_SNAT_V6)); rule.add_predicate(Predicate::InnerEtherType(vec![ EtherTypeMatch::Exact(ETHER_TYPE_IPV6), diff --git a/lib/oxide-vpc/src/engine/router.rs b/lib/oxide-vpc/src/engine/router.rs index e7c12f0f..878fd077 100644 --- a/lib/oxide-vpc/src/engine/router.rs +++ b/lib/oxide-vpc/src/engine/router.rs @@ -48,6 +48,7 @@ use opte::engine::rule::MetaAction; use opte::engine::rule::ModMetaResult; use opte::engine::rule::Rule; use uuid::Uuid; +use crate::api::stat::*; pub const ROUTER_LAYER_NAME: &str = "router"; @@ -259,6 +260,7 @@ pub fn setup( let actions = LayerActions { default_in: DefaultAction::Allow, default_out: DefaultAction::Deny, + default_out_stat_id: Some(ROUTER_NOROUTE), ..Default::default() }; From d084d3302e2acceda9b978fa5107da5fb3b17adb Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Fri, 2 May 2025 17:39:57 +0100 Subject: [PATCH 07/37] Style. --- bin/opteadm/src/bin/opteadm.rs | 4 ++-- lib/opte-ioctl/src/lib.rs | 5 ++++- lib/oxide-vpc/src/api/stat.rs | 2 +- lib/oxide-vpc/src/engine/gateway/mod.rs | 2 +- lib/oxide-vpc/src/engine/nat.rs | 16 +++++++++++----- lib/oxide-vpc/src/engine/router.rs | 2 +- xde/src/xde.rs | 9 +++++++-- 7 files changed, 27 insertions(+), 13 deletions(-) diff --git a/bin/opteadm/src/bin/opteadm.rs b/bin/opteadm/src/bin/opteadm.rs index 6a3e4171..f3391d6a 100644 --- a/bin/opteadm/src/bin/opteadm.rs +++ b/bin/opteadm/src/bin/opteadm.rs @@ -283,7 +283,7 @@ enum Command { /// The OPTE port to read... #[arg(short)] port: String, - } + }, } #[derive(Debug, Parser)] @@ -870,7 +870,7 @@ fn main() -> anyhow::Result<()> { // XXX TEMP Command::DumpFlowStats { port } => { - let DumpFlowStatsResp{ data } = hdl.dump_flowstats(&port)?; + let DumpFlowStatsResp { data } = hdl.dump_flowstats(&port)?; println!("{data}"); } } diff --git a/lib/opte-ioctl/src/lib.rs b/lib/opte-ioctl/src/lib.rs index 17cef73a..3dbf212a 100644 --- a/lib/opte-ioctl/src/lib.rs +++ b/lib/opte-ioctl/src/lib.rs @@ -371,7 +371,10 @@ impl OpteHdl { } /// TEMP METHOD - pub fn dump_flowstats(&self, port_name: &str) -> Result { + pub fn dump_flowstats( + &self, + port_name: &str, + ) -> Result { let cmd = OpteCmd::DumpFlowStats; run_cmd_ioctl( self.device.as_raw_fd(), diff --git a/lib/oxide-vpc/src/api/stat.rs b/lib/oxide-vpc/src/api/stat.rs index 20cfcb00..b88abede 100644 --- a/lib/oxide-vpc/src/api/stat.rs +++ b/lib/oxide-vpc/src/api/stat.rs @@ -30,4 +30,4 @@ pub static NAT_VALID_IGW_V4: Uuid = pub static NAT_VALID_IGW_V6: Uuid = Uuid::from_fields(0x01de_f00d, 0x7777, 0x0003, &3u64.to_be_bytes()); pub static NAT_NONE: Uuid = - Uuid::from_fields(0x01de_f00d, 0x7777, 0x0003, &255u64.to_be_bytes()); \ No newline at end of file + Uuid::from_fields(0x01de_f00d, 0x7777, 0x0003, &255u64.to_be_bytes()); diff --git a/lib/oxide-vpc/src/engine/gateway/mod.rs b/lib/oxide-vpc/src/engine/gateway/mod.rs index 6b1290ba..b24e6e50 100644 --- a/lib/oxide-vpc/src/engine/gateway/mod.rs +++ b/lib/oxide-vpc/src/engine/gateway/mod.rs @@ -42,6 +42,7 @@ use crate::api::DhcpCfg; use crate::api::MacAddr; +use crate::api::stat::*; use crate::cfg::Ipv4Cfg; use crate::cfg::Ipv6Cfg; use crate::cfg::VpcCfg; @@ -77,7 +78,6 @@ use opte::engine::rule::MetaAction; use opte::engine::rule::ModMetaResult; use opte::engine::rule::Rule; use opte::engine::rule::StaticAction; -use crate::api::stat::*; pub mod arp; pub mod dhcp; diff --git a/lib/oxide-vpc/src/engine/nat.rs b/lib/oxide-vpc/src/engine/nat.rs index ce202fa1..8fc14063 100644 --- a/lib/oxide-vpc/src/engine/nat.rs +++ b/lib/oxide-vpc/src/engine/nat.rs @@ -10,6 +10,7 @@ use super::router::RouterTargetClass; use super::router::RouterTargetInternal; use crate::api::ExternalIpCfg; use crate::api::SetExternalIpsReq; +use crate::api::stat::*; use crate::cfg::IpCfg; use crate::cfg::Ipv4Cfg; use crate::cfg::Ipv6Cfg; @@ -47,7 +48,6 @@ use opte::engine::rule::Rule; use opte::engine::snat::ConcreteIpAddr; use opte::engine::snat::SNat; use uuid::Uuid; -use crate::api::stat::*; pub const NAT_LAYER_NAME: &str = "nat"; const FLOATING_ONE_TO_ONE_NAT_PRIORITY: u16 = 5; @@ -291,8 +291,11 @@ fn setup_ipv4_nat( let snat = Arc::new(snat); for igw_id in igw_matches { - let mut rule = - Rule::new_with_id(SNAT_PRIORITY, Action::Stateful(snat.clone()), Some(NAT_SNAT_V4)); + let mut rule = Rule::new_with_id( + SNAT_PRIORITY, + Action::Stateful(snat.clone()), + Some(NAT_SNAT_V4), + ); rule.add_predicate(Predicate::InnerEtherType(vec![ EtherTypeMatch::Exact(ETHER_TYPE_IPV4), @@ -440,8 +443,11 @@ fn setup_ipv6_nat( let snat = Arc::new(snat); for igw_id in igw_matches { - let mut rule = - Rule::new_with_id(SNAT_PRIORITY, Action::Stateful(snat.clone()), Some(NAT_SNAT_V6)); + let mut rule = Rule::new_with_id( + SNAT_PRIORITY, + Action::Stateful(snat.clone()), + Some(NAT_SNAT_V6), + ); rule.add_predicate(Predicate::InnerEtherType(vec![ EtherTypeMatch::Exact(ETHER_TYPE_IPV6), diff --git a/lib/oxide-vpc/src/engine/router.rs b/lib/oxide-vpc/src/engine/router.rs index 878fd077..fbd34458 100644 --- a/lib/oxide-vpc/src/engine/router.rs +++ b/lib/oxide-vpc/src/engine/router.rs @@ -13,6 +13,7 @@ use super::firewall as fw; use crate::api::DelRouterEntryResp; use crate::api::RouterClass; use crate::api::RouterTarget; +use crate::api::stat::*; use crate::cfg::VpcCfg; use alloc::string::String; use alloc::string::ToString; @@ -48,7 +49,6 @@ use opte::engine::rule::MetaAction; use opte::engine::rule::ModMetaResult; use opte::engine::rule::Rule; use uuid::Uuid; -use crate::api::stat::*; pub const ROUTER_LAYER_NAME: &str = "router"; diff --git a/xde/src/xde.rs b/xde/src/xde.rs index fc6f5f82..8b4107d2 100644 --- a/xde/src/xde.rs +++ b/xde/src/xde.rs @@ -694,11 +694,16 @@ unsafe extern "C" fn xde_ioc_opte_cmd(karg: *mut c_void, mode: c_int) -> c_int { } #[unsafe(no_mangle)] -fn flow_stats_hdlr(env: &mut IoctlEnvelope) -> Result { +fn flow_stats_hdlr( + env: &mut IoctlEnvelope, +) -> Result { let req: oxide_vpc::api::DumpUftReq = env.copy_in_req()?; let devs = xde_devs().read(); match devs.get_by_name(&req.port_name) { - Some(dev) => dev.port.dump_flow_stats().map(|data| oxide_vpc::api::DumpFlowStatsResp {data}), + Some(dev) => dev + .port + .dump_flow_stats() + .map(|data| oxide_vpc::api::DumpFlowStatsResp { data }), None => Err(OpteError::PortNotFound(req.port_name)), } } From 4b71596e28af79c924610a5fc3fa5364677fd273 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Fri, 2 May 2025 18:02:03 +0100 Subject: [PATCH 08/37] Ioctl does not need engine. --- lib/opte/src/lib.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/opte/src/lib.rs b/lib/opte/src/lib.rs index 2838a5e9..c19f8eb4 100644 --- a/lib/opte/src/lib.rs +++ b/lib/opte/src/lib.rs @@ -31,6 +31,7 @@ extern crate self as opte; use alloc::boxed::Box; use core::fmt; use core::fmt::Display; +#[cfg(any(feature = "engine", test))] use engine::stat::StatTree; pub use ingot; @@ -258,6 +259,7 @@ pub struct ExecCtx { pub log: Box, } +#[cfg(any(feature = "engine", test))] pub(crate) struct ExecCtx2<'a> { pub user_ctx: &'a ExecCtx, pub stats: &'a mut StatTree, From 6880f6417bc1622bc6ad9dd575962cd631147621 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Thu, 12 Jun 2025 12:09:06 +0100 Subject: [PATCH 09/37] More fixups. --- xde/src/xde.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/xde/src/xde.rs b/xde/src/xde.rs index 2782dd2d..0798b055 100644 --- a/xde/src/xde.rs +++ b/xde/src/xde.rs @@ -697,12 +697,14 @@ unsafe extern "C" fn xde_ioc_opte_cmd(karg: *mut c_void, mode: c_int) -> c_int { } } +// TODO: this is just sufficient for a demo. Develop the actual interface. #[unsafe(no_mangle)] fn flow_stats_hdlr( env: &mut IoctlEnvelope, ) -> Result { let req: oxide_vpc::api::DumpUftReq = env.copy_in_req()?; - let devs = xde_devs().read(); + let state = get_xde_state(); + let devs = state.devs.read(); match devs.get_by_name(&req.port_name) { Some(dev) => dev .port From bd3353f1ad09b476fbc107ea5db0f4728731a16e Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Thu, 12 Jun 2025 16:45:30 +0100 Subject: [PATCH 10/37] Break TreeStat -> Root & Intermediate This should make it more clear at each stage which types are being used where. This also reduced struct sizes for each, and in a given `parent`s list we take no more space than an Arc would. It may be worth applying the same to `children`. --- lib/opte/src/engine/layer.rs | 39 +++--- lib/opte/src/engine/stat.rs | 244 ++++++++++++++++++++++++----------- 2 files changed, 190 insertions(+), 93 deletions(-) diff --git a/lib/opte/src/engine/layer.rs b/lib/opte/src/engine/layer.rs index d774c138..49fe62d6 100644 --- a/lib/opte/src/engine/layer.rs +++ b/lib/opte/src/engine/layer.rs @@ -29,8 +29,9 @@ use super::rule::GenBtError; use super::rule::HdrTransformError; use super::rule::Rule; use super::rule::ht_probe; +use super::stat::IntermediateStat; +use super::stat::RootStat; use super::stat::StatTree; -use super::stat::TableStat; use crate::ExecCtx; use crate::ExecCtx2; use crate::LogLevel; @@ -164,7 +165,7 @@ pub enum LftError { #[derive(Clone, Debug)] struct LftInEntry { action_desc: ActionDescEntry, - stat: Arc, + stat: Arc, } impl Display for LftInEntry { @@ -185,7 +186,7 @@ impl Dump for LftInEntry { struct LftOutEntry { in_flow_pair: InnerFlowId, action_desc: ActionDescEntry, - stat: Arc, + stat: Arc, } impl LftOutEntry { @@ -227,7 +228,7 @@ impl LayerFlowTable { action_desc: ActionDescEntry, in_flow: InnerFlowId, out_flow: InnerFlowId, - stat: Arc, + stat: Arc, ) { // We add unchekced because the limit is now enforced by // LayerFlowTable, not the individual flow tables. @@ -368,10 +369,10 @@ enum EntryState { /// No flow entry was found matching a given flowid. None, /// An existing flow table entry was found. - Clean(ActionDescEntry, Arc), + Clean(ActionDescEntry, Arc), /// An existing flow table entry was found, but rule processing must be rerun /// to use the original action or invalidate the underlying entry. - Dirty(ActionDescEntry, Arc), + Dirty(ActionDescEntry, Arc), } /// The default action of a layer. @@ -538,10 +539,10 @@ pub struct Layer { actions: Vec, default_in: DefaultAction, default_in_hits: u64, - default_in_stat: Arc, + default_in_stat: Arc, default_out: DefaultAction, default_out_hits: u64, - default_out_stat: Arc, + default_out_stat: Arc, ft: LayerFlowTable, ft_cstr: CString, rules_in: RuleTable, @@ -971,8 +972,9 @@ impl Layer { }); } - let stat = - ectx.stats.new_intermediate(vec![stat.take().unwrap()]); + let stat = ectx + .stats + .new_intermediate(vec![stat.take().unwrap().into()]); pkt.meta_mut().stats.push(stat.clone()); // The outbound flow ID mirrors the inbound. Remember, @@ -1134,8 +1136,9 @@ impl Layer { } } - let stat = - ectx.stats.new_intermediate(vec![stat.take().unwrap()]); + let stat = ectx + .stats + .new_intermediate(vec![stat.take().unwrap().into()]); pkt.meta_mut().stats.push(stat.clone()); // The outbound flow ID must be calculated _after_ the @@ -1291,8 +1294,9 @@ impl Layer { }); } - let stat = - ectx.stats.new_intermediate(vec![stat.take().unwrap()]); + let stat = ectx + .stats + .new_intermediate(vec![stat.take().unwrap().into()]); pkt.meta_mut().stats.push(stat.clone()); // The inbound flow ID must be calculated _after_ the @@ -1417,8 +1421,9 @@ impl Layer { }); } - let stat = - ectx.stats.new_intermediate(vec![stat.take().unwrap()]); + let stat = ectx + .stats + .new_intermediate(vec![stat.take().unwrap().into()]); pkt.meta_mut().stats.push(stat.clone()); let desc = match action.gen_desc(pkt.flow(), pkt, ameta) { @@ -1652,7 +1657,7 @@ struct RuleTableEntry { id: RuleId, hits: u64, rule: Rule, - stat: Arc, + stat: Arc, } impl From<&RuleTableEntry> for RuleTableEntryDump { diff --git a/lib/opte/src/engine/stat.rs b/lib/opte/src/engine/stat.rs index 42f54f02..4f778902 100644 --- a/lib/opte/src/engine/stat.rs +++ b/lib/opte/src/engine/stat.rs @@ -55,8 +55,8 @@ pub struct FlowStat { /// The other half of this flow. pub partner: InnerFlowId, /// `TableStat`s to whom we must return our own `stats`. - pub parents: Vec>, - /// The cached list of IDs of root `TableStat` entries. + pub parents: Box<[StatParent]>, + /// The cached list of IDs of reachable `RootStat` entries. pub bases: BTreeSet, /// Actual stats associated with this flow. @@ -101,17 +101,102 @@ impl From<&FlowStat> for ApiFlowStat { } } -pub struct TableStat { - pub id: Option, +#[derive(Clone, Debug)] +pub enum StatParent { + Root(Arc), + Intermadiate(Arc), +} + +impl From> for StatParent { + fn from(value: Arc) -> Self { + Self::Root(value) + } +} + +impl From> for StatParent { + fn from(value: Arc) -> Self { + Self::Intermadiate(value) + } +} + +impl StatParent { + fn parents(&self) -> &[StatParent] { + match self { + Self::Root(_) => &[], + Self::Intermadiate(i) => &i.parents, + } + } + + fn global_id(&self) -> StatId { + self.inner().stats.id() + } + + fn root_id(&self) -> Option<&Uuid> { + match self { + Self::Root(r) => Some(&r.id), + Self::Intermadiate(_) => None, + } + } + + fn inner(&self) -> &TableStat { + match self { + Self::Root(r) => &r.body, + Self::Intermadiate(i) => &i.body, + } + } + + /// Allow a packet which will track local stats via a UFT entry. + pub fn allow(&self) { + self.inner().allow(); + } + + /// Allow a packet (at a given timestamp) which will track local stats via + /// a UFT entry. + pub fn allow_at(&self, time: Moment) { + self.inner().allow_at(time); + } + + /// Record an action for a packet which will ultimately be dropped or + /// hairpinned. + pub fn act(&self, action: Action, pkt_size: u64, direction: Direction) { + self.inner().act(action, pkt_size, direction); + } + + /// Record an action for a packet (at a given time) which will ultimately + /// be dropped or hairpinned. + pub fn act_at( + &self, + action: Action, + pkt_size: u64, + direction: Direction, + time: Moment, + ) { + self.inner().act_at(action, pkt_size, direction, time); + } +} + +#[derive(Debug)] +pub struct RootStat { + pub id: Uuid, + body: TableStat, +} + +#[derive(Debug)] +pub struct IntermediateStat { + pub parents: Box<[StatParent]>, + body: TableStat, +} - pub parents: Vec>, - pub children: KRwLock>>, +struct TableStat { + /// A list of other stat-related objects who name this table + /// stat as one of its parents. + children: KRwLock>>, /// The actual stats! - pub stats: FullCounter, + stats: FullCounter, /// When was this flow last updated? - pub last_hit: AtomicU64, + last_hit: AtomicU64, } impl core::fmt::Debug for TableStat { @@ -121,27 +206,20 @@ impl core::fmt::Debug for TableStat { } impl TableStat { - /// Allow a packet which will track local stats via a UFT entry. - pub fn allow(&self) { + fn allow(&self) { self.allow_at(Moment::now()); } - /// Allow a packet (at a given timestamp) which will track local stats via - /// a UFT entry. - pub fn allow_at(&self, time: Moment) { + fn allow_at(&self, time: Moment) { self.last_hit.store(time.raw(), Ordering::Relaxed); self.stats.allow.fetch_add(1, Ordering::Relaxed); } - /// Record an action for a packet which will ultimately be dropped or - /// hairpinned. - pub fn act(&self, action: Action, pkt_size: u64, direction: Direction) { + fn act(&self, action: Action, pkt_size: u64, direction: Direction) { self.act_at(action, pkt_size, direction, Moment::now()); } - /// Record an action for a packet (at a given time) which will ultimately - /// be dropped or hairpinned. - pub fn act_at( + fn act_at( &self, action: Action, pkt_size: u64, @@ -278,10 +356,18 @@ impl FoldStat for FlowStat { } } -impl FoldStat for TableStat { +impl FoldStat for IntermediateStat { fn fold(&self, into: &FullCounter, visited: &mut BTreeSet) { - if !visited.insert(self.stats.id()) { - self.stats.combine(into); + if !visited.insert(self.body.stats.id()) { + self.body.stats.combine(into); + } + } +} + +impl FoldStat for RootStat { + fn fold(&self, into: &FullCounter, visited: &mut BTreeSet) { + if !visited.insert(self.body.stats.id()) { + self.body.stats.combine(into); } } } @@ -292,8 +378,8 @@ impl FoldStat for TableStat { #[derive(Default)] pub struct StatTree { next_id: u64, - roots: BTreeMap>, - intermediate: Vec>, + roots: BTreeMap>, + intermediate: Vec>, flows: BTreeMap>, } @@ -301,19 +387,20 @@ impl StatTree { /// Gets or creates the root stat for a given UUID. /// /// Allocates a new UUID if none is provided. - pub fn root(&mut self, uuid: Option) -> Arc { + pub fn root(&mut self, uuid: Option) -> Arc { let uuid = uuid.unwrap_or_else(|| Uuid::from_u64_pair(0, self.next_id)); let ids = &mut self.next_id; self.roots .entry(uuid) - .or_insert_with_key(|id| { - Arc::new(TableStat { - id: Some(*id), - parents: vec![], - children: KRwLock::new(vec![]), - stats: FullCounter::from_next_id(ids), - last_hit: Moment::now().raw().into(), + .or_insert_with(|| { + Arc::new(RootStat { + id: uuid, + body: TableStat { + children: KRwLock::new(vec![]), + stats: FullCounter::from_next_id(ids), + last_hit: Moment::now().raw().into(), + }, }) }) .clone() @@ -321,18 +408,19 @@ impl StatTree { pub fn new_intermediate( &mut self, - parents: Vec>, - ) -> Arc { - let out = Arc::new(TableStat { - id: None, - parents, - children: KRwLock::new(vec![]), - stats: FullCounter::from_next_id(&mut self.next_id), - last_hit: Moment::now().raw().into(), + parents: Vec, + ) -> Arc { + let out = Arc::new(IntermediateStat { + parents: parents.into(), + body: TableStat { + children: KRwLock::new(vec![]), + stats: FullCounter::from_next_id(&mut self.next_id), + last_hit: Moment::now().raw().into(), + }, }); for parent in &out.parents { - let mut p_children = parent.children.write(); + let mut p_children = parent.inner().children.write(); let weak = Arc::downgrade(&out); p_children.push(weak); } @@ -347,7 +435,7 @@ impl StatTree { flow_id: &InnerFlowId, partner_flow: &InnerFlowId, dir: Direction, - parents: Vec>, + parents: Vec, ) -> Arc { if let Entry::Occupied(e) = self.flows.entry(*flow_id) { // TODO: what to do with (maybe new) parents & bases?! @@ -356,6 +444,7 @@ impl StatTree { return e.get().clone(); } + let parents = parents.into_boxed_slice(); let bases = get_base_ids(&parents); let out = match self.flows.entry(*partner_flow) { @@ -397,7 +486,7 @@ impl StatTree { const ROOT_EXPIRY_WINDOW: Ttl = Ttl::new_seconds(100); #[derive(Default, Eq, PartialEq)] - enum Hmm { + enum Liveness { #[default] NotSeen, SeenKeep, @@ -405,13 +494,13 @@ impl StatTree { } #[derive(Default)] - struct Aa { - lhs: Hmm, - rhs: Hmm, + struct JointLive { + lhs: Liveness, + rhs: Liveness, } // Flows -- need to account for shared component between arc'd things. - let mut possibly_expired: BTreeMap = BTreeMap::new(); + let mut possibly_expired: BTreeMap = BTreeMap::new(); for (k, v) in &self.flows { let t_hit = Moment::from_raw_nanos(v.last_hit.load(Ordering::Relaxed)); @@ -421,23 +510,23 @@ impl StatTree { let el = possibly_expired.entry(base_id).or_default(); match (v.dir, can_remove) { (Direction::In, false) => { - el.lhs = Hmm::SeenKeep; + el.lhs = Liveness::SeenKeep; } (Direction::Out, false) => { - el.rhs = Hmm::SeenKeep; + el.rhs = Liveness::SeenKeep; } (Direction::In, true) => { - el.lhs = Hmm::Seen(*k); + el.lhs = Liveness::Seen(*k); } (Direction::Out, true) => { - el.rhs = Hmm::Seen(*k); + el.rhs = Liveness::Seen(*k); } } } for v in possibly_expired.values() { - let cannot_remove = v.lhs == Hmm::SeenKeep - || v.rhs == Hmm::SeenKeep - || (v.lhs == Hmm::NotSeen && v.rhs == Hmm::NotSeen); + let cannot_remove = v.lhs == Liveness::SeenKeep + || v.rhs == Liveness::SeenKeep + || (v.lhs == Liveness::NotSeen && v.rhs == Liveness::NotSeen); if cannot_remove { continue; } @@ -445,7 +534,7 @@ impl StatTree { #[allow(clippy::mutable_key_type)] let mut parents: BTreeSet = Default::default(); let mut base_stats = None; - if let Hmm::Seen(id) = v.lhs { + if let Liveness::Seen(id) = v.lhs { if let Some(flow) = self.flows.remove(&id) { let flow = Arc::into_inner(flow) .expect("strong count 1 is enforced above"); @@ -455,7 +544,7 @@ impl StatTree { base_stats = Some(flow.shared); } } - if let Hmm::Seen(id) = v.rhs { + if let Liveness::Seen(id) = v.rhs { if let Some(flow) = self.flows.remove(&id) { let flow = Arc::into_inner(flow) .expect("strong count 1 is enforced above"); @@ -470,7 +559,7 @@ impl StatTree { let base_stats = base_stats.expect("should not have no parent here!!"); for parent in parents { - base_stats.stats.combine(&parent.0.stats.packets); + base_stats.stats.combine(&parent.0.inner().stats.packets); } } @@ -479,7 +568,7 @@ impl StatTree { // Time is... not relevant here. The LFTs are GONE. if Arc::strong_count(v) == 1 { for p in &v.parents { - v.stats.combine(&p.stats); + v.body.stats.combine(&p.inner().stats); } false } else { @@ -491,7 +580,7 @@ impl StatTree { // same ID come and go in adjacent control plane operations... self.roots.retain(|_, v| { let t_hit = - Moment::from_raw_nanos(v.last_hit.load(Ordering::Relaxed)); + Moment::from_raw_nanos(v.body.last_hit.load(Ordering::Relaxed)); Arc::strong_count(v) > 1 || !ROOT_EXPIRY_WINDOW.is_expired(t_hit, now) }); @@ -502,16 +591,19 @@ impl StatTree { let mut out = String::new(); out.push_str("--Roots--\n"); for (id, root) in &self.roots { - let d = ApiFullCounter::from(&root.stats); - out.push_str(&format!("\t{:?}/{id} -> {d:?}\n", root.stats.id())); + let d = ApiFullCounter::from(&root.body.stats); + out.push_str(&format!( + "\t{:?}/{id} -> {d:?}\n", + root.body.stats.id() + )); } out.push_str("----\n\n"); out.push_str("--Ints--\n"); for root in &self.intermediate { - let d = ApiFullCounter::from(&root.stats); - out.push_str(&format!("\t{:?} -> {d:?}\n", root.stats.id())); + let d = ApiFullCounter::from(&root.body.stats); + out.push_str(&format!("\t{:?} -> {d:?}\n", root.body.stats.id())); let parents: Vec> = - root.parents.iter().map(|v| v.id).collect(); + root.parents.iter().map(|v| v.root_id().copied()).collect(); out.push_str(&format!("\t\tparents {parents:?}\n\n")); } out.push_str("----\n\n"); @@ -520,7 +612,7 @@ impl StatTree { // let d: ApiFlowStat = stat.as_ref().into(); let d: ApiPktCounter = (&stat.as_ref().shared.stats).into(); let parents: Vec<_> = - stat.parents.iter().map(|v| v.stats.id()).collect(); + stat.parents.iter().map(|v| v.global_id()).collect(); out.push_str(&format!("\t{id}/{} ->\n", stat.dir)); out.push_str(&format!("\t\t{:?} {d:?}\n", stat.shared.stats.id)); out.push_str(&format!("\t\tparents {:?}\n", parents)); @@ -531,14 +623,14 @@ impl StatTree { } } -fn get_base_ids(parents: &[Arc]) -> BTreeSet { +fn get_base_ids(parents: &[StatParent]) -> BTreeSet { let mut out = BTreeSet::new(); let mut work_set = parents.to_vec(); while let Some(el) = work_set.pop() { - work_set.extend_from_slice(&el.parents); - if let Some(id) = el.id { - out.insert(id); + work_set.extend_from_slice(el.parents()); + if let Some(id) = el.root_id() { + out.insert(*id); } } @@ -547,7 +639,7 @@ fn get_base_ids(parents: &[Arc]) -> BTreeSet { /// XXX holds stats as they arrive on a packet. pub struct FlowStatBuilder { - parents: Vec>, + parents: Vec, layer_end: usize, } @@ -561,8 +653,8 @@ impl FlowStatBuilder { } /// Push a parent onto this flow. - pub fn push(&mut self, parent: Arc) { - self.parents.push(parent); + pub fn push(&mut self, parent: impl Into) { + self.parents.push(parent.into()); } /// Mark all current parents as [`Action::Allow`]. @@ -577,7 +669,7 @@ impl FlowStatBuilder { pkt_size: u64, direction: Direction, create_flow: bool, - ) -> Option>> { + ) -> Option> { match action { Action::Allow if create_flow => { self.parents.iter().for_each(|v| v.allow()); @@ -612,7 +704,7 @@ impl Default for FlowStatBuilder { } } -struct ById(Arc); +struct ById(StatParent); impl PartialOrd for ById { fn partial_cmp(&self, other: &Self) -> Option { @@ -622,13 +714,13 @@ impl PartialOrd for ById { impl Ord for ById { fn cmp(&self, other: &Self) -> core::cmp::Ordering { - self.0.stats.id().cmp(&other.0.stats.id()) + self.0.global_id().cmp(&other.0.global_id()) } } impl PartialEq for ById { fn eq(&self, other: &Self) -> bool { - self.0.stats.id() == other.0.stats.id() + self.0.global_id() == other.0.global_id() } } From 460c2d9104a356091238b1f30fc84af2268bce27 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Thu, 12 Jun 2025 17:41:50 +0100 Subject: [PATCH 11/37] Goodbye to Dynamic Dispatch --- lib/opte/src/engine/stat.rs | 44 +++++++++++++++++++++++++++++-------- 1 file changed, 35 insertions(+), 9 deletions(-) diff --git a/lib/opte/src/engine/stat.rs b/lib/opte/src/engine/stat.rs index 4f778902..ffdc8a2e 100644 --- a/lib/opte/src/engine/stat.rs +++ b/lib/opte/src/engine/stat.rs @@ -104,7 +104,7 @@ impl From<&FlowStat> for ApiFlowStat { #[derive(Clone, Debug)] pub enum StatParent { Root(Arc), - Intermadiate(Arc), + Intermediate(Arc), } impl From> for StatParent { @@ -115,7 +115,7 @@ impl From> for StatParent { impl From> for StatParent { fn from(value: Arc) -> Self { - Self::Intermadiate(value) + Self::Intermediate(value) } } @@ -123,7 +123,7 @@ impl StatParent { fn parents(&self) -> &[StatParent] { match self { Self::Root(_) => &[], - Self::Intermadiate(i) => &i.parents, + Self::Intermediate(i) => &i.parents, } } @@ -134,14 +134,14 @@ impl StatParent { fn root_id(&self) -> Option<&Uuid> { match self { Self::Root(r) => Some(&r.id), - Self::Intermadiate(_) => None, + Self::Intermediate(_) => None, } } fn inner(&self) -> &TableStat { match self { Self::Root(r) => &r.body, - Self::Intermadiate(i) => &i.body, + Self::Intermediate(i) => &i.body, } } @@ -173,6 +173,30 @@ impl StatParent { ) { self.inner().act_at(action, pkt_size, direction, time); } + + /// Add a weak child reference to this stat object. + pub fn append_child(&self, child: impl Into) { + let mut p_children = self.inner().children.write(); + p_children.push(child.into()); + } +} + +#[derive(Clone, Debug)] +pub enum StatChild { + Intermediate(Weak), + Flow(Weak), +} + +impl From<&Arc> for StatChild { + fn from(value: &Arc) -> Self { + Self::Intermediate(Arc::downgrade(value)) + } +} + +impl From<&Arc> for StatChild { + fn from(value: &Arc) -> Self { + Self::Flow(Arc::downgrade(value)) + } } #[derive(Debug)] @@ -190,7 +214,7 @@ pub struct IntermediateStat { struct TableStat { /// A list of other stat-related objects who name this table /// stat as one of its parents. - children: KRwLock>>, + children: KRwLock>, /// The actual stats! stats: FullCounter, @@ -420,9 +444,7 @@ impl StatTree { }); for parent in &out.parents { - let mut p_children = parent.inner().children.write(); - let weak = Arc::downgrade(&out); - p_children.push(weak); + parent.append_child(&out); } self.intermediate.push(out.clone()); @@ -475,6 +497,10 @@ impl StatTree { } }; + for parent in &out.parents { + parent.append_child(&out); + } + // Proven a miss on flow_id already let _ = self.flows.insert(*flow_id, out.clone()); out From 4a9ff9b806288a09721890873778c9037a1ed2c0 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Thu, 12 Jun 2025 17:54:20 +0100 Subject: [PATCH 12/37] Ah, XDE. --- lib/opte/src/engine/stat.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/opte/src/engine/stat.rs b/lib/opte/src/engine/stat.rs index ffdc8a2e..e717e07b 100644 --- a/lib/opte/src/engine/stat.rs +++ b/lib/opte/src/engine/stat.rs @@ -10,6 +10,7 @@ use crate::api::InnerFlowId; use crate::ddi::sync::KRwLock; use crate::ddi::time::Moment; use crate::engine::flow_table::Ttl; +use alloc::boxed::Box; use alloc::collections::BTreeMap; use alloc::collections::BTreeSet; use alloc::collections::btree_map::Entry; From 20e843244a3dd27bbd954902d40fbeb1fd478f1b Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Tue, 17 Jun 2025 14:19:50 +0100 Subject: [PATCH 13/37] Properly separate providers from the rest of port context. --- crates/opte-api/src/lib.rs | 2 +- lib/opte-test-utils/src/lib.rs | 5 +- lib/opte/src/engine/layer.rs | 20 +++---- lib/opte/src/engine/mod.rs | 12 +++- lib/opte/src/engine/port/mod.rs | 18 +++--- lib/opte/src/lib.rs | 98 +-------------------------------- lib/opte/src/provider.rs | 87 +++++++++++++++++++++++++++++ xde/src/xde.rs | 8 +-- 8 files changed, 127 insertions(+), 123 deletions(-) create mode 100644 lib/opte/src/provider.rs diff --git a/crates/opte-api/src/lib.rs b/crates/opte-api/src/lib.rs index 1f837ea5..a2032de7 100644 --- a/crates/opte-api/src/lib.rs +++ b/crates/opte-api/src/lib.rs @@ -53,7 +53,7 @@ pub use ulp::*; /// /// We rely on CI and the check-api-version.sh script to verify that /// this number is incremented anytime the oxide-api code changes. -pub const API_VERSION: u64 = 37; +pub const API_VERSION: u64 = 38; /// Major version of the OPTE package. pub const MAJOR_VERSION: u64 = 0; diff --git a/lib/opte-test-utils/src/lib.rs b/lib/opte-test-utils/src/lib.rs index a7a340cb..79ce78c3 100644 --- a/lib/opte-test-utils/src/lib.rs +++ b/lib/opte-test-utils/src/lib.rs @@ -16,7 +16,6 @@ pub mod pcap; pub mod port_state; // Let's make our lives easier and pub use a bunch of stuff. -pub use opte::ExecCtx; pub use opte::api::Direction::*; pub use opte::api::MacAddr; pub use opte::ddi::mblk::MsgBlk; @@ -63,6 +62,7 @@ pub use opte::ingot::types::Emit; pub use opte::ingot::types::EmitDoesNotRelyOnBufContents; pub use opte::ingot::types::HeaderLen; pub use opte::ingot::udp::Udp; +pub use opte::provider::Providers; pub use oxide_vpc::api::AddFwRuleReq; pub use oxide_vpc::api::BOUNDARY_SERVICES_VNI; pub use oxide_vpc::api::DhcpCfg; @@ -258,7 +258,8 @@ fn oxide_net_builder( v2b: Arc, ) -> PortBuilder { #[allow(clippy::arc_with_non_send_sync)] - let ectx = Arc::new(ExecCtx { log: Box::new(opte::PrintlnLog {}) }); + let ectx = + Arc::new(Providers { log: Box::new(opte::provider::PrintlnLog) }); let name_cstr = std::ffi::CString::new(name).unwrap(); let mut pb = PortBuilder::new(name, name_cstr, cfg.guest_mac, ectx); diff --git a/lib/opte/src/engine/layer.rs b/lib/opte/src/engine/layer.rs index 49fe62d6..bd93dc89 100644 --- a/lib/opte/src/engine/layer.rs +++ b/lib/opte/src/engine/layer.rs @@ -32,9 +32,6 @@ use super::rule::ht_probe; use super::stat::IntermediateStat; use super::stat::RootStat; use super::stat::StatTree; -use crate::ExecCtx; -use crate::ExecCtx2; -use crate::LogLevel; use crate::api::DumpLayerResp; use crate::d_error::DError; #[cfg(all(not(feature = "std"), not(test)))] @@ -44,6 +41,9 @@ use crate::ddi::kstat::KStatProvider; use crate::ddi::kstat::KStatU64; use crate::ddi::mblk::MsgBlk; use crate::ddi::time::Moment; +use crate::engine::ExecCtx; +use crate::provider::LogLevel; +use crate::provider::Providers; use alloc::ffi::CString; use alloc::string::String; use alloc::string::ToString; @@ -840,7 +840,7 @@ impl Layer { pub(crate) fn process( &mut self, - ectx: &mut ExecCtx2, + ectx: &mut ExecCtx, dir: Direction, pkt: &mut Packet, xforms: &mut Transforms, @@ -859,7 +859,7 @@ impl Layer { fn process_in( &mut self, - ectx: &mut ExecCtx2, + ectx: &mut ExecCtx, pkt: &mut Packet, xforms: &mut Transforms, ameta: &mut ActionMeta, @@ -933,7 +933,7 @@ impl Layer { fn process_in_rules( &mut self, - ectx: &mut ExecCtx2, + ectx: &mut ExecCtx, pkt: &mut Packet, xforms: &mut Transforms, ameta: &mut ActionMeta, @@ -1181,7 +1181,7 @@ impl Layer { fn process_out( &mut self, - ectx: &mut ExecCtx2, + ectx: &mut ExecCtx, pkt: &mut Packet, xforms: &mut Transforms, ameta: &mut ActionMeta, @@ -1255,7 +1255,7 @@ impl Layer { fn process_out_rules( &mut self, - ectx: &mut ExecCtx2, + ectx: &mut ExecCtx, pkt: &mut Packet, xforms: &mut Transforms, ameta: &mut ActionMeta, @@ -1509,7 +1509,7 @@ impl Layer { fn record_gen_desc_failure( &self, - ectx: &ExecCtx, + ectx: &Providers, dir: Direction, flow: &InnerFlowId, err: &rule::GenDescError, @@ -1526,7 +1526,7 @@ impl Layer { fn record_gen_ht_failure( &self, - ectx: &ExecCtx, + ectx: &Providers, dir: Direction, flow: &InnerFlowId, err: &rule::GenHtError, diff --git a/lib/opte/src/engine/mod.rs b/lib/opte/src/engine/mod.rs index 08793d3e..f9dc366e 100644 --- a/lib/opte/src/engine/mod.rs +++ b/lib/opte/src/engine/mod.rs @@ -5,8 +5,7 @@ // Copyright 2025 Oxide Computer Company //! The engine in OPTE. -//! -//! All code under this namespace is guarded by the `engine` feature flag. + pub mod arp; pub mod checksum; pub mod dhcp; @@ -36,6 +35,7 @@ pub mod tcp_state; pub mod udp; use crate::ddi::mblk::MsgBlk; +use crate::provider::Providers; use checksum::Checksum; use ingot::tcp::TcpRef; use ingot::types::IntoBufPointer; @@ -48,6 +48,7 @@ use packet::Packet; use packet::Pullup; use parse::ValidNoEncap; use rule::CompiledTransform; +use stat::StatTree; use zerocopy::ByteSlice; use zerocopy::ByteSliceMut; @@ -141,6 +142,13 @@ use crate::engine::packet::InnerFlowId; use crate::engine::packet::ParseError; use crate::engine::port::UftEntry; +/// Context containing platform-specific providers and shared elements from a +/// [`port::Port`], used within layer and action execution. +pub struct ExecCtx<'a> { + pub user_ctx: &'a Providers, + pub stats: &'a mut StatTree, +} + /// The action to take for a single packet, based on the processing of /// the [`NetworkImpl::handle_pkt()`] callback. pub enum HdlPktAction { diff --git a/lib/opte/src/engine/port/mod.rs b/lib/opte/src/engine/port/mod.rs index 4db2354b..09a989af 100644 --- a/lib/opte/src/engine/port/mod.rs +++ b/lib/opte/src/engine/port/mod.rs @@ -6,6 +6,7 @@ //! A virtual switch port. +use super::ExecCtx; use super::HdlPktAction; use super::LightweightMeta; use super::NetworkImpl; @@ -51,8 +52,6 @@ use super::tcp::KEEPALIVE_EXPIRE_TTL; use super::tcp::TIME_WAIT_EXPIRE_TTL; use super::tcp_state::TcpFlowState; use super::tcp_state::TcpFlowStateError; -use crate::ExecCtx; -use crate::ExecCtx2; use crate::api::DumpLayerResp; use crate::api::DumpTcpFlowsResp; use crate::api::DumpUftResp; @@ -73,6 +72,7 @@ use crate::engine::flow_table::ExpiryPolicy; use crate::engine::packet::EmitSpec; use crate::engine::packet::PushSpec; use crate::engine::rule::CompiledEncap; +use crate::provider::Providers; use alloc::boxed::Box; use alloc::ffi::CString; use alloc::string::String; @@ -239,7 +239,7 @@ pub enum DropReason { /// Only the port builder may add or remove layers. Once you have a /// [`Port`] the list of layers is immutable. pub struct PortBuilder { - ectx: Arc, + ectx: Arc, name: String, // Cache the CString version of the name for use with DTrace // probes. @@ -411,7 +411,7 @@ impl PortBuilder { name: &str, name_cstr: CString, mac: MacAddr, - ectx: Arc, + ectx: Arc, ) -> Self { PortBuilder { name: name.to_string(), @@ -762,11 +762,13 @@ struct PortData { /// /// ### Execution Context /// -/// The `ExecCtx` provides implementations of specific features that -/// are valid for the given context the port is running in. +/// The `Providers` struct offers implementations of specific features that +/// are valid for the given context the port is running in (kernel, userland, ...). +/// This is combined with views of port specific fields in `ExecCtx`, which allows +/// layer/rule execution to access shared stats. pub struct Port { epoch: AtomicU64, - ectx: Arc, + ectx: Arc, name: String, // Cache the CString version of the name for use with DTrace // probes. @@ -2024,7 +2026,7 @@ impl Port { ameta: &mut ActionMeta, ) -> result::Result { let mut ectx = - ExecCtx2 { user_ctx: &self.ectx, stats: &mut data.flow_stats }; + ExecCtx { user_ctx: &self.ectx, stats: &mut data.flow_stats }; match dir { Direction::Out => { diff --git a/lib/opte/src/lib.rs b/lib/opte/src/lib.rs index be91099e..c50be535 100644 --- a/lib/opte/src/lib.rs +++ b/lib/opte/src/lib.rs @@ -28,12 +28,6 @@ extern crate cfg_if; // can use fully-qualified type paths. extern crate self as opte; -use alloc::boxed::Box; -use core::fmt; -use core::fmt::Display; -#[cfg(any(feature = "engine", test))] -use engine::stat::StatTree; - pub use ingot; #[cfg(any(feature = "api", test))] @@ -48,6 +42,8 @@ pub mod dynamic; pub mod engine; #[cfg(any(feature = "std", test))] pub mod print; +#[cfg(any(feature = "engine", test))] +pub mod provider; /// Return value with `bit` set. /// @@ -174,93 +170,3 @@ mod opte_provider { ) { } } - -// ================================================================ -// Providers -// -// Providers allow opte-core to work in different contexts (in theory) -// by allowing various implementations of core services to be plugged -// into the engine. For example, logging and stats can both be done as -// providers; providing implementations fit for in-kernel execution -// versus unit testing execution. Ideally we could get to a point -// where OPTE could also easily be stood up in userland (not that it -// is explicitly a goal, but only that the flexibility gives us better -// options for testing or unique production situations). However, this -// is the type of abstraction that can quickly grow out of control. If -// it doesn't serve an obvious purpose with at least two obvious -// implmentations, then it probably doesn't need to be a provider. -// -// XXX For now we stash providers here. This should probably move to -// dedicated module. -// ================================================================ - -/// A logging provider provides the means to log messages to some -/// destination based on the context in which OPTE is running. -/// -/// For example, in a unit test this could map to `println!`. In the -/// illumos kernel it would map to `cmn_err(9F)`. -/// -/// Logging levels are provided by [`LogLevel`]. These levels will map -/// to the underlying provider with varying degrees of success. -pub trait LogProvider: Send + Sync { - /// Log a message at the specified level. - fn log(&self, level: LogLevel, msg: &str); -} - -#[derive(Clone, Copy, Debug)] -pub enum LogLevel { - Note, - Warn, - Error, -} - -impl Display for LogLevel { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - let level_s = match self { - Self::Note => "[NOTE]", - Self::Warn => "[WARN]", - Self::Error => "[ERROR]", - }; - write!(f, "{level_s}") - } -} - -#[cfg(any(feature = "std", test))] -#[derive(Clone, Copy)] -pub struct PrintlnLog {} - -#[cfg(any(feature = "std", test))] -impl LogProvider for PrintlnLog { - fn log(&self, level: LogLevel, msg: &str) { - println!("{level} {msg}"); - } -} - -#[cfg(all(feature = "kernel", not(feature = "std"), not(test)))] -pub struct KernelLog {} - -#[cfg(all(feature = "kernel", not(feature = "std"), not(test)))] -impl LogProvider for KernelLog { - fn log(&self, level: LogLevel, msg: &str) { - use illumos_sys_hdrs as ddi; - - let cmn_level = match level { - LogLevel::Note => ddi::CE_NOTE, - LogLevel::Warn => ddi::CE_WARN, - LogLevel::Error => ddi::CE_WARN, - }; - - let msg_arg = alloc::ffi::CString::new(msg).unwrap(); - unsafe { ddi::cmn_err(cmn_level, msg_arg.as_ptr()) } - } -} - -pub struct ExecCtx { - pub log: Box, -} - -#[cfg(any(feature = "engine", test))] -pub(crate) struct ExecCtx2<'a> { - pub user_ctx: &'a ExecCtx, - pub stats: &'a mut StatTree, -} diff --git a/lib/opte/src/provider.rs b/lib/opte/src/provider.rs new file mode 100644 index 00000000..e18ef0f2 --- /dev/null +++ b/lib/opte/src/provider.rs @@ -0,0 +1,87 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +// Copyright 2025 Oxide Computer Company + +//! Providers allow opte-core to work in different contexts (in theory) +//! by allowing various implementations of core services to be plugged +//! into the engine. For example, logging and stats can both be done as +//! providers; providing implementations fit for in-kernel execution +//! versus unit testing execution. Ideally we could get to a point +//! where OPTE could also easily be stood up in userland (not that it +//! is explicitly a goal, but only that the flexibility gives us better +//! options for testing or unique production situations). However, this +//! is the type of abstraction that can quickly grow out of control. If +//! it doesn't serve an obvious purpose with at least two obvious +//! implmentations, then it probably doesn't need to be a provider. + +use alloc::boxed::Box; +use core::fmt; +use core::fmt::Display; + +/// The set of all platform-specific providers required by a port. +pub struct Providers { + pub log: Box, +} + +/// A logging provider provides the means to log messages to some +/// destination based on the context in which OPTE is running. +/// +/// For example, in a unit test this could map to `println!`. In the +/// illumos kernel it would map to `cmn_err(9F)`. +/// +/// Logging levels are provided by [`LogLevel`]. These levels will map +/// to the underlying provider with varying degrees of success. +pub trait LogProvider: Send + Sync { + /// Log a message at the specified level. + fn log(&self, level: LogLevel, msg: &str); +} + +#[derive(Clone, Copy, Debug)] +pub enum LogLevel { + Note, + Warn, + Error, +} + +impl Display for LogLevel { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let level_s = match self { + Self::Note => "[NOTE]", + Self::Warn => "[WARN]", + Self::Error => "[ERROR]", + }; + write!(f, "{level_s}") + } +} + +#[cfg(any(feature = "std", test))] +#[derive(Clone, Copy)] +pub struct PrintlnLog; + +#[cfg(any(feature = "std", test))] +impl LogProvider for PrintlnLog { + fn log(&self, level: LogLevel, msg: &str) { + println!("{level} {msg}"); + } +} + +#[cfg(all(feature = "kernel", not(feature = "std"), not(test)))] +pub struct KernelLog; + +#[cfg(all(feature = "kernel", not(feature = "std"), not(test)))] +impl LogProvider for KernelLog { + fn log(&self, level: LogLevel, msg: &str) { + use illumos_sys_hdrs as ddi; + + let cmn_level = match level { + LogLevel::Note => ddi::CE_NOTE, + LogLevel::Warn => ddi::CE_WARN, + LogLevel::Error => ddi::CE_WARN, + }; + + let msg_arg = alloc::ffi::CString::new(msg).unwrap(); + unsafe { ddi::cmn_err(cmn_level, msg_arg.as_ptr()) } + } +} diff --git a/xde/src/xde.rs b/xde/src/xde.rs index 0798b055..17e87427 100644 --- a/xde/src/xde.rs +++ b/xde/src/xde.rs @@ -63,7 +63,6 @@ use ingot::geneve::GeneveRef; use ingot::ip::IpProtocol; use ingot::types::HeaderLen; use ingot::udp::Udp; -use opte::ExecCtx; use opte::api::ClearLftReq; use opte::api::ClearUftReq; use opte::api::CmdOk; @@ -108,6 +107,7 @@ use opte::engine::parse::ValidUlp; use opte::engine::port::Port; use opte::engine::port::PortBuilder; use opte::engine::port::ProcessResult; +use opte::provider::Providers; use oxide_vpc::api::AddFwRuleReq; use oxide_vpc::api::AddRouterEntryReq; use oxide_vpc::api::ClearVirt2BoundaryReq; @@ -248,7 +248,7 @@ pub struct xde_underlay_port { struct XdeState { management_lock: TokenLock, - ectx: Arc, + ectx: Arc, vpc_map: Arc, v2b: Arc, devs: ReadOnlyDevMap, @@ -289,7 +289,7 @@ fn get_xde_state() -> &'static XdeState { impl XdeState { fn new() -> Self { - let ectx = Arc::new(ExecCtx { log: Box::new(opte::KernelLog {}) }); + let ectx = Arc::new(Providers { log: Box::new(opte::KernelLog) }); let dev_map = Arc::new(KRwLock::new(DevMap::default())); let devs = ReadOnlyDevMap::new(dev_map.clone()); @@ -2089,7 +2089,7 @@ fn new_port( vpc_map: Arc, v2p: Arc, v2b: Arc, - ectx: Arc, + ectx: Arc, dhcp_cfg: &DhcpCfg, ) -> Result>, OpteError> { let cfg = cfg.clone(); From a80fc6f5baa9348add4bedce1e5ce7d49f23f834 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Tue, 17 Jun 2025 14:23:21 +0100 Subject: [PATCH 14/37] Aha --- xde/src/xde.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/xde/src/xde.rs b/xde/src/xde.rs index 17e87427..15782122 100644 --- a/xde/src/xde.rs +++ b/xde/src/xde.rs @@ -289,7 +289,8 @@ fn get_xde_state() -> &'static XdeState { impl XdeState { fn new() -> Self { - let ectx = Arc::new(Providers { log: Box::new(opte::KernelLog) }); + let ectx = + Arc::new(Providers { log: Box::new(opte::provider::KernelLog) }); let dev_map = Arc::new(KRwLock::new(DevMap::default())); let devs = ReadOnlyDevMap::new(dev_map.clone()); From 55ed983660799fb46f86efaca45260380856f525 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Tue, 17 Jun 2025 20:33:22 +0100 Subject: [PATCH 15/37] Start cleanup, thinking about tests. --- lib/opte-test-utils/src/lib.rs | 1 - lib/opte/src/engine/layer.rs | 14 +-- lib/opte/src/engine/stat.rs | 193 +++++++++++++++++++++++++++------ 3 files changed, 169 insertions(+), 39 deletions(-) diff --git a/lib/opte-test-utils/src/lib.rs b/lib/opte-test-utils/src/lib.rs index 79ce78c3..5e9e3e37 100644 --- a/lib/opte-test-utils/src/lib.rs +++ b/lib/opte-test-utils/src/lib.rs @@ -257,7 +257,6 @@ fn oxide_net_builder( v2p: Arc, v2b: Arc, ) -> PortBuilder { - #[allow(clippy::arc_with_non_send_sync)] let ectx = Arc::new(Providers { log: Box::new(opte::provider::PrintlnLog) }); let name_cstr = std::ffi::CString::new(name).unwrap(); diff --git a/lib/opte/src/engine/layer.rs b/lib/opte/src/engine/layer.rs index bd93dc89..f449cfdb 100644 --- a/lib/opte/src/engine/layer.rs +++ b/lib/opte/src/engine/layer.rs @@ -29,7 +29,7 @@ use super::rule::GenBtError; use super::rule::HdrTransformError; use super::rule::Rule; use super::rule::ht_probe; -use super::stat::IntermediateStat; +use super::stat::InternalStat; use super::stat::RootStat; use super::stat::StatTree; use crate::api::DumpLayerResp; @@ -165,7 +165,7 @@ pub enum LftError { #[derive(Clone, Debug)] struct LftInEntry { action_desc: ActionDescEntry, - stat: Arc, + stat: Arc, } impl Display for LftInEntry { @@ -186,7 +186,7 @@ impl Dump for LftInEntry { struct LftOutEntry { in_flow_pair: InnerFlowId, action_desc: ActionDescEntry, - stat: Arc, + stat: Arc, } impl LftOutEntry { @@ -228,9 +228,9 @@ impl LayerFlowTable { action_desc: ActionDescEntry, in_flow: InnerFlowId, out_flow: InnerFlowId, - stat: Arc, + stat: Arc, ) { - // We add unchekced because the limit is now enforced by + // We add unchecked because the limit is now enforced by // LayerFlowTable, not the individual flow tables. let in_entry = LftInEntry { action_desc: action_desc.clone(), stat: stat.clone() }; @@ -369,10 +369,10 @@ enum EntryState { /// No flow entry was found matching a given flowid. None, /// An existing flow table entry was found. - Clean(ActionDescEntry, Arc), + Clean(ActionDescEntry, Arc), /// An existing flow table entry was found, but rule processing must be rerun /// to use the original action or invalidate the underlying entry. - Dirty(ActionDescEntry, Arc), + Dirty(ActionDescEntry, Arc), } /// The default action of a layer. diff --git a/lib/opte/src/engine/stat.rs b/lib/opte/src/engine/stat.rs index e717e07b..f6d95313 100644 --- a/lib/opte/src/engine/stat.rs +++ b/lib/opte/src/engine/stat.rs @@ -27,7 +27,8 @@ use opte_api::PacketCounter as ApiPktCounter; use opte_api::TcpState; use uuid::Uuid; -// TODO DELETION ON FLOW CLOSE [and holding onto 'dead flows'] +// TODO READOUT OF STAT FROM GIVEN ROOT(S). +// TODO restrict most of this to pub(crate)? /// Opaque identifier for tracking unique stat objects. #[derive(Copy, Clone, Hash, PartialEq, PartialOrd, Eq, Ord, Debug)] @@ -50,6 +51,8 @@ pub enum Action { Hairpin, } +/// Packet counters and additional information associated with an accepted +/// flow's 5-tuple. pub struct FlowStat { /// The direction of this flow half. pub dir: Direction, @@ -68,26 +71,31 @@ pub struct FlowStat { } impl FlowStat { + /// Record an packet matching this flow and direction. pub fn hit(&self, pkt_size: u64) { self.hit_at(pkt_size, Moment::now()); } + /// Record an packet matching this flow and direction, using + /// an existing timestamp. pub fn hit_at(&self, pkt_size: u64, time: Moment) { self.last_hit.store(time.raw(), Ordering::Relaxed); self.shared.stats.hit(self.dir, pkt_size); } } +/// Packet counters shared by both halves of a flow. Each 5-tuple references +/// this struct through a [`FlowStat`]. pub struct SharedFlowStat { - /// Actual stats associated with this flow. + /// Counters associated with this flow. pub stats: PacketCounter, - /// Tcp? + /// Estimated TCP state from monitoring a flow. /// - /// Yeah this needs some rework wrt today... + /// XXX: TODO pub tcp: Option, - /// パケットはどちらにきましたか。 + /// The direction this flow was opened on. pub first_dir: Direction, } @@ -102,10 +110,11 @@ impl From<&FlowStat> for ApiFlowStat { } } +/// Stat objects which can be a parent to a non-root node. #[derive(Clone, Debug)] pub enum StatParent { Root(Arc), - Intermediate(Arc), + Internal(Arc), } impl From> for StatParent { @@ -114,9 +123,9 @@ impl From> for StatParent { } } -impl From> for StatParent { - fn from(value: Arc) -> Self { - Self::Intermediate(value) +impl From> for StatParent { + fn from(value: Arc) -> Self { + Self::Internal(value) } } @@ -124,7 +133,7 @@ impl StatParent { fn parents(&self) -> &[StatParent] { match self { Self::Root(_) => &[], - Self::Intermediate(i) => &i.parents, + Self::Internal(i) => &i.parents, } } @@ -135,14 +144,14 @@ impl StatParent { fn root_id(&self) -> Option<&Uuid> { match self { Self::Root(r) => Some(&r.id), - Self::Intermediate(_) => None, + Self::Internal(_) => None, } } fn inner(&self) -> &TableStat { match self { Self::Root(r) => &r.body, - Self::Intermediate(i) => &i.body, + Self::Internal(i) => &i.body, } } @@ -182,15 +191,16 @@ impl StatParent { } } +/// Stat objects which can be a child to a non-leaf node. #[derive(Clone, Debug)] pub enum StatChild { - Intermediate(Weak), + Internal(Weak), Flow(Weak), } -impl From<&Arc> for StatChild { - fn from(value: &Arc) -> Self { - Self::Intermediate(Arc::downgrade(value)) +impl From<&Arc> for StatChild { + fn from(value: &Arc) -> Self { + Self::Internal(Arc::downgrade(value)) } } @@ -200,27 +210,31 @@ impl From<&Arc> for StatChild { } } +/// Long-lived counters associated with a rule or control-plane relevant +/// object. #[derive(Debug)] pub struct RootStat { pub id: Uuid, body: TableStat, } +/// Temporary counters associated with an LFT entry. #[derive(Debug)] -pub struct IntermediateStat { +pub struct InternalStat { pub parents: Box<[StatParent]>, body: TableStat, } +/// Shared components on non-flow stats. struct TableStat { /// A list of other stat-related objects who name this table /// stat as one of its parents. children: KRwLock>, - /// The actual stats! + /// The actual stats stats: FullCounter, - /// When was this flow last updated? + /// When was a hit last recorded? last_hit: AtomicU64, } @@ -262,6 +276,9 @@ impl TableStat { } } +/// Packet count/byte counters. +/// +/// Base component of any counter set in OPTE. pub struct PacketCounter { pub id: StatId, pub created_at: Moment, @@ -325,6 +342,7 @@ impl From<&PacketCounter> for ApiPktCounter { } } +/// Counts of actions taken/packets encountered by a rule. pub struct FullCounter { pub allow: AtomicU64, pub deny: AtomicU64, @@ -369,6 +387,18 @@ impl From<&FullCounter> for ApiFullCounter { } } +impl From<&RootStat> for ApiFullCounter { + fn from(val: &RootStat) -> Self { + (&val.body.stats).into() + } +} + +impl From<&InternalStat> for ApiFullCounter { + fn from(val: &InternalStat) -> Self { + (&val.body.stats).into() + } +} + pub trait FoldStat: Send + Sync { fn fold(&self, into: &FullCounter, visited: &mut BTreeSet); } @@ -381,7 +411,7 @@ impl FoldStat for FlowStat { } } -impl FoldStat for IntermediateStat { +impl FoldStat for InternalStat { fn fold(&self, into: &FullCounter, visited: &mut BTreeSet) { if !visited.insert(self.body.stats.id()) { self.body.stats.combine(into); @@ -397,14 +427,14 @@ impl FoldStat for RootStat { } } -/// Tracking/handling of all stats. +/// Manager of all stat/counter objects within a port. +/// /// -/// ?? Describe? #[derive(Default)] pub struct StatTree { next_id: u64, roots: BTreeMap>, - intermediate: Vec>, + internal: Vec>, flows: BTreeMap>, } @@ -431,11 +461,12 @@ impl StatTree { .clone() } + /// Creates a new internal node from a given set of parents. pub fn new_intermediate( &mut self, parents: Vec, - ) -> Arc { - let out = Arc::new(IntermediateStat { + ) -> Arc { + let out = Arc::new(InternalStat { parents: parents.into(), body: TableStat { children: KRwLock::new(vec![]), @@ -448,11 +479,12 @@ impl StatTree { parent.append_child(&out); } - self.intermediate.push(out.clone()); + self.internal.push(out.clone()); out } + /// Gets or creates the flow stat pub fn new_flow( &mut self, flow_id: &InnerFlowId, @@ -502,11 +534,13 @@ impl StatTree { parent.append_child(&out); } - // Proven a miss on flow_id already + // We have proven a miss on flow_id already let _ = self.flows.insert(*flow_id, out.clone()); out } + /// Remove all stat entries which have grown stale, folding packet/decision + /// counters into registered parents. pub fn expire(&mut self, now: Moment) { const EXPIRY_WINDOW: Ttl = Ttl::new_seconds(10); // Root removal and re-entry? Don't want any gaps. @@ -590,8 +624,8 @@ impl StatTree { } } - // Intermediates. - self.intermediate.retain(|v| { + // Internal/branch nodes. + self.internal.retain(|v| { // Time is... not relevant here. The LFTs are GONE. if Arc::strong_count(v) == 1 { for p in &v.parents { @@ -626,7 +660,7 @@ impl StatTree { } out.push_str("----\n\n"); out.push_str("--Ints--\n"); - for root in &self.intermediate { + for root in &self.internal { let d = ApiFullCounter::from(&root.body.stats); out.push_str(&format!("\t{:?} -> {d:?}\n", root.body.stats.id())); let parents: Vec> = @@ -650,6 +684,7 @@ impl StatTree { } } +/// Return the underlying stats of decision-making rules which allowed a flow. fn get_base_ids(parents: &[StatParent]) -> BTreeSet { let mut out = BTreeSet::new(); @@ -664,7 +699,8 @@ fn get_base_ids(parents: &[StatParent]) -> BTreeSet { out } -/// XXX holds stats as they arrive on a packet. +/// Collects stats as a packet is processed, keeping track of the boundary +/// of the most recent layer. pub struct FlowStatBuilder { parents: Vec, layer_end: usize, @@ -731,6 +767,7 @@ impl Default for FlowStatBuilder { } } +/// Utility newtype for tracking visited nodes. struct ById(StatParent); impl PartialOrd for ById { @@ -752,3 +789,97 @@ impl PartialEq for ById { } impl Eq for ById {} + +#[cfg(test)] +mod tests { + use super::*; + use crate::api::AddrPair; + use ingot::ip::IpProtocol; + use opte_api::Ipv4Addr; + + const ROOT_0: Uuid = Uuid::from_u64_pair(1234, 0); + const ROOT_1: Uuid = Uuid::from_u64_pair(1234, 1); + const ROOT_2: Uuid = Uuid::from_u64_pair(1234, 2); + const ROOT_3: Uuid = Uuid::from_u64_pair(1234, 3); + + const FLOW_OUT: InnerFlowId = InnerFlowId { + proto: IpProtocol::UDP.0, + addrs: AddrPair::V4 { + src: Ipv4Addr::from_const([10, 0, 0, 1]), + dst: Ipv4Addr::from_const([1, 1, 1, 1]), + }, + proto_info: [12345, 53], + }; + + const FLOW_IN: InnerFlowId = InnerFlowId { + proto: IpProtocol::UDP.0, + addrs: AddrPair::V4 { + dst: Ipv4Addr::from_const([10, 0, 0, 1]), + src: Ipv4Addr::from_const([1, 1, 1, 1]), + }, + proto_info: [53, 12345], + }; + + #[test] + fn flow_stat_deny() { + // Assert that all (non-terminal) layers are counted as an 'accept'. + // All stats in the last layer instead increment the terminal action. + let mut tree = StatTree::default(); + + let r0 = tree.root(Some(ROOT_0)); + let r1 = tree.root(Some(ROOT_1)); + let r2 = tree.root(Some(ROOT_2)); + let r3 = tree.root(Some(ROOT_3)); + + let i0 = tree.new_intermediate(vec![r0.into()]); + let i1 = tree.new_intermediate(vec![r2.into()]); + + let mut fb = FlowStatBuilder::new(); + fb.push(i0.clone()); + fb.new_layer(); + fb.push(r1.clone()); + fb.new_layer(); + fb.push(i1.clone()); + fb.push(r3.clone()); + + assert!( + fb.terminate(Action::Deny, 128, Direction::Out, false).is_none() + ); + let snap_i0: ApiFullCounter = i0.as_ref().into(); + assert_eq!(snap_i0.allow, 1); + assert_eq!(snap_i0.deny, 0); + assert_eq!(snap_i0.packets.pkts_out, 1); + assert_eq!(snap_i0.packets.bytes_out, 128); + + let snap_r1: ApiFullCounter = r1.as_ref().into(); + assert_eq!(snap_i0.allow, 1); + assert_eq!(snap_r1.deny, 0); + assert_eq!(snap_r1.packets.pkts_out, 1); + assert_eq!(snap_r1.packets.bytes_out, 128); + + let snap_i1: ApiFullCounter = i1.as_ref().into(); + assert_eq!(snap_i1.allow, 0); + assert_eq!(snap_i1.deny, 1); + assert_eq!(snap_i1.packets.pkts_out, 1); + assert_eq!(snap_i1.packets.bytes_out, 128); + + let snap_r3: ApiFullCounter = r3.as_ref().into(); + assert_eq!(snap_r3.allow, 0); + assert_eq!(snap_r3.deny, 1); + assert_eq!(snap_r3.packets.pkts_out, 1); + assert_eq!(snap_r3.packets.bytes_out, 128); + + // Does this work with only one layer? + let mut fb = FlowStatBuilder::new(); + fb.push(i0.clone()); + assert!( + fb.terminate(Action::Deny, 64, Direction::Out, false).is_none() + ); + + let snap_i0: ApiFullCounter = i0.as_ref().into(); + assert_eq!(snap_i0.allow, 1); + assert_eq!(snap_i0.deny, 1); + assert_eq!(snap_i0.packets.pkts_out, 2); + assert_eq!(snap_i0.packets.bytes_out, 192); + } +} From cfaa2735ba562c11fae5432597960f8dfed879ba Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Wed, 18 Jun 2025 13:26:25 +0100 Subject: [PATCH 16/37] Testing, thinking through various invariants --- crates/opte-api/src/stat.rs | 6 +- lib/opte/src/engine/stat.rs | 266 ++++++++++++++++++++++++++++++------ 2 files changed, 228 insertions(+), 44 deletions(-) diff --git a/crates/opte-api/src/stat.rs b/crates/opte-api/src/stat.rs index 846bf88f..f7f8afa3 100644 --- a/crates/opte-api/src/stat.rs +++ b/crates/opte-api/src/stat.rs @@ -12,7 +12,7 @@ use serde::Deserialize; use serde::Serialize; use uuid::Uuid; -#[derive(Deserialize, Serialize, Debug, Clone)] +#[derive(Deserialize, Serialize, Debug, Clone, Eq, PartialEq)] pub struct FlowStat { pub partner: FlowId, pub dir: Direction, @@ -20,7 +20,7 @@ pub struct FlowStat { pub stats: PacketCounter, } -#[derive(Deserialize, Serialize, Debug, Clone, Copy)] +#[derive(Deserialize, Serialize, Debug, Clone, Copy, Eq, PartialEq)] pub struct PacketCounter { pub created_at: u64, pub pkts_in: u64, @@ -29,7 +29,7 @@ pub struct PacketCounter { pub bytes_out: u64, } -#[derive(Deserialize, Serialize, Debug, Clone, Copy)] +#[derive(Deserialize, Serialize, Debug, Clone, Copy, Eq, PartialEq)] pub struct FullCounter { pub allow: u64, pub deny: u64, diff --git a/lib/opte/src/engine/stat.rs b/lib/opte/src/engine/stat.rs index f6d95313..10388a08 100644 --- a/lib/opte/src/engine/stat.rs +++ b/lib/opte/src/engine/stat.rs @@ -157,19 +157,22 @@ impl StatParent { /// Allow a packet which will track local stats via a UFT entry. pub fn allow(&self) { - self.inner().allow(); + self.allow_at(Moment::now()); } /// Allow a packet (at a given timestamp) which will track local stats via /// a UFT entry. pub fn allow_at(&self, time: Moment) { - self.inner().allow_at(time); + if let Self::Root(r) = self { + r.record_hit(time); + } + self.inner().allow(); } /// Record an action for a packet which will ultimately be dropped or /// hairpinned. pub fn act(&self, action: Action, pkt_size: u64, direction: Direction) { - self.inner().act(action, pkt_size, direction); + self.act_at(action, pkt_size, direction, Moment::now()); } /// Record an action for a packet (at a given time) which will ultimately @@ -181,7 +184,10 @@ impl StatParent { direction: Direction, time: Moment, ) { - self.inner().act_at(action, pkt_size, direction, time); + if let Self::Root(r) = self { + r.record_hit(time); + } + self.inner().act(action, pkt_size, direction); } /// Add a weak child reference to this stat object. @@ -210,14 +216,33 @@ impl From<&Arc> for StatChild { } } +impl StatChild { + /// Returns whether any strong references to this child node remain. + fn is_alive(&self) -> bool { + match self { + Self::Internal(i) => i.strong_count() != 0, + Self::Flow(i) => i.strong_count() != 0, + } + } +} + /// Long-lived counters associated with a rule or control-plane relevant /// object. #[derive(Debug)] pub struct RootStat { + /// The control-plane ID associated with these counters. pub id: Uuid, + /// When was a hit last recorded? + pub last_hit: AtomicU64, body: TableStat, } +impl RootStat { + fn record_hit(&self, time: Moment) { + self.last_hit.store(time.raw(), Ordering::Relaxed); + } +} + /// Temporary counters associated with an LFT entry. #[derive(Debug)] pub struct InternalStat { @@ -231,48 +256,32 @@ struct TableStat { /// stat as one of its parents. children: KRwLock>, - /// The actual stats + /// The actual stats. stats: FullCounter, - - /// When was a hit last recorded? - last_hit: AtomicU64, } impl core::fmt::Debug for TableStat { - fn fmt(&self, _f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - todo!() + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + f.debug_struct("TableStat") + .field("children", &"") + .field("stats", &ApiFullCounter::from(&self.stats)) + .finish() } } impl TableStat { fn allow(&self) { - self.allow_at(Moment::now()); - } - - fn allow_at(&self, time: Moment) { - self.last_hit.store(time.raw(), Ordering::Relaxed); self.stats.allow.fetch_add(1, Ordering::Relaxed); } fn act(&self, action: Action, pkt_size: u64, direction: Direction) { - self.act_at(action, pkt_size, direction, Moment::now()); - } - - fn act_at( - &self, - action: Action, - pkt_size: u64, - direction: Direction, - time: Moment, - ) { - self.last_hit.store(time.raw(), Ordering::Relaxed); self.stats.packets.hit(direction, pkt_size); - let stat = match action { + match action { Action::Allow => &self.stats.allow, Action::Deny => &self.stats.deny, Action::Hairpin => &self.stats.hairpin, - }; - stat.fetch_add(1, Ordering::Relaxed); + } + .fetch_add(1, Ordering::Relaxed); } } @@ -361,7 +370,7 @@ impl FullCounter { } fn combine(&self, into: &Self) { - into.packets.combine(&self.packets); + self.packets.combine(&into.packets); into.allow .fetch_add(self.allow.load(Ordering::Relaxed), Ordering::Relaxed); into.deny @@ -451,10 +460,10 @@ impl StatTree { .or_insert_with(|| { Arc::new(RootStat { id: uuid, + last_hit: Moment::now().raw().into(), body: TableStat { children: KRwLock::new(vec![]), stats: FullCounter::from_next_id(ids), - last_hit: Moment::now().raw().into(), }, }) }) @@ -471,7 +480,6 @@ impl StatTree { body: TableStat { children: KRwLock::new(vec![]), stats: FullCounter::from_next_id(&mut self.next_id), - last_hit: Moment::now().raw().into(), }, }); @@ -496,6 +504,16 @@ impl StatTree { // TODO: what to do with (maybe new) parents & bases?! // I *think* these should win out, insert, and preserve // the old stats. Need to think about it. + // + // I think what may be needed is a 'last synced' stat set for a + // flow, so that we can save out the delta from that if 'parents' + // changes. E.g.: + // EPOCH 0 -- flow has parents a, b', d + // -- flow exists for ~2min actively + // EPOCH 1 -- firewall rule change occurs + // -- flow *now* has parents a, c, d + // -- flow closes, but b' and c should receive the packet + // byte/counts split at the epoch 0->1 transition. return e.get().clone(); } @@ -560,7 +578,12 @@ impl StatTree { rhs: Liveness, } - // Flows -- need to account for shared component between arc'd things. + // + // Flows -- we need to account for shared component between arc'd halves + // of each, hence the liveness tracking. At a high level, we can expire + // a flow if one half exists (but is stale), or both halves exist and + // *both* are stale. + // let mut possibly_expired: BTreeMap = BTreeMap::new(); for (k, v) in &self.flows { let t_hit = @@ -584,6 +607,7 @@ impl StatTree { } } } + for v in possibly_expired.values() { let cannot_remove = v.lhs == Liveness::SeenKeep || v.rhs == Liveness::SeenKeep @@ -624,9 +648,13 @@ impl StatTree { } } + // // Internal/branch nodes. + // self.internal.retain(|v| { - // Time is... not relevant here. The LFTs are GONE. + // Internal nodes do not have/use a last_hit time, as their + // lifetimes are tied exclusively to LFT entries (we do not + // re-query them, either). if Arc::strong_count(v) == 1 { for p in &v.parents { v.body.stats.combine(&p.inner().stats); @@ -637,14 +665,28 @@ impl StatTree { } }); + // // Roots may need to be held onto for some time in case rules with the // same ID come and go in adjacent control plane operations... + // self.roots.retain(|_, v| { let t_hit = - Moment::from_raw_nanos(v.body.last_hit.load(Ordering::Relaxed)); + Moment::from_raw_nanos(v.last_hit.load(Ordering::Relaxed)); Arc::strong_count(v) > 1 || !ROOT_EXPIRY_WINDOW.is_expired(t_hit, now) }); + + // + // Reap any child references. + // + self.internal.iter().for_each(|el| { + let mut children = el.body.children.write(); + children.retain(|c| c.is_alive()); + }); + self.roots.values().for_each(|el| { + let mut children = el.body.children.write(); + children.retain(|c| c.is_alive()); + }); } // TEMP @@ -701,6 +743,9 @@ fn get_base_ids(parents: &[StatParent]) -> BTreeSet { /// Collects stats as a packet is processed, keeping track of the boundary /// of the most recent layer. +/// +/// TODO: there are soundness rules to prevent double-counting if different +/// expiries occur at different times. Codify these. pub struct FlowStatBuilder { parents: Vec, layer_end: usize, @@ -733,27 +778,28 @@ impl FlowStatBuilder { direction: Direction, create_flow: bool, ) -> Option> { + let now = Moment::now(); match action { Action::Allow if create_flow => { - self.parents.iter().for_each(|v| v.allow()); + self.parents.iter().for_each(|v| v.allow_at(now)); // TODO: should *take*? Some(self.parents.clone()) } Action::Allow => { self.parents .iter() - .for_each(|v| v.act(action, pkt_size, direction)); + .for_each(|v| v.act_at(action, pkt_size, direction, now)); None } Action::Deny | Action::Hairpin => { let (accepted, last_layer) = self.parents.split_at(self.layer_end); - accepted - .iter() - .for_each(|v| v.act(Action::Allow, pkt_size, direction)); + accepted.iter().for_each(|v| { + v.act_at(Action::Allow, pkt_size, direction, now) + }); last_layer .iter() - .for_each(|v| v.act(action, pkt_size, direction)); + .for_each(|v| v.act_at(action, pkt_size, direction, now)); None } @@ -792,6 +838,8 @@ impl Eq for ById {} #[cfg(test)] mod tests { + use core::time::Duration; + use super::*; use crate::api::AddrPair; use ingot::ip::IpProtocol; @@ -882,4 +930,140 @@ mod tests { assert_eq!(snap_i0.packets.pkts_out, 2); assert_eq!(snap_i0.packets.bytes_out, 192); } + + #[test] + fn flow_lifecycle() { + let mut tree = StatTree::default(); + + let r0 = tree.root(Some(ROOT_0)); + let r1 = tree.root(Some(ROOT_1)); + let r2 = tree.root(Some(ROOT_2)); + let r3 = tree.root(Some(ROOT_3)); + + let i0 = tree.new_intermediate(vec![r0.clone().into()]); + let i1 = tree.new_intermediate(vec![r1.clone().into()]); + + let p_sz = 64; + let f_out = { + let mut fb = FlowStatBuilder::new(); + fb.push(i0.clone()); + fb.push(r3.clone()); + tree.new_flow( + &FLOW_OUT, + &FLOW_IN, + Direction::Out, + fb.terminate(Action::Allow, p_sz, Direction::Out, true) + .unwrap(), + ) + }; + f_out.hit(p_sz); + + let f_in = { + let mut fb = FlowStatBuilder::new(); + fb.push(i0.clone()); + fb.push(i1.clone()); + fb.push(r2.clone()); + tree.new_flow( + &FLOW_IN, + &FLOW_OUT, + Direction::In, + fb.terminate(Action::Allow, p_sz, Direction::In, true).unwrap(), + ) + }; + f_in.hit(p_sz); + + // These should refer to the same block of packet counters. + assert!(Arc::ptr_eq(&f_out.shared, &f_in.shared)); + + // Suppose some more packets come in 5 seconds later. + let t_0 = Moment::now() + Duration::from_secs(5); + f_in.hit_at(150, t_0); + f_in.hit_at(100, t_0); + f_in.hit_at(230, t_0); + + // The UFT has been cleared out -- eviction, protocol finish, etc. + drop(f_in); + drop(f_out); + + // Perform expiry. Suppose we're doing so just after that update, + // then nothing should change. + let t_1 = t_0 + Duration::from_secs(1); + tree.expire(t_1); + assert!(tree.flows.contains_key(&FLOW_IN)); + assert!(tree.flows.contains_key(&FLOW_OUT)); + assert_eq!(tree.internal.len(), 2); + + // Both halves of a flow must be stale for expiry to proceed. + tree.expire(t_1 + Duration::from_secs(5)); + assert!(tree.flows.contains_key(&FLOW_IN)); + assert!(tree.flows.contains_key(&FLOW_OUT)); + assert_eq!(tree.internal.len(), 2); + + // Perform an expiry for real. Suppose that the LFT i1 has been removed + // from its layer table -- its stats will have been given to r1. + let t_2 = t_1 + Duration::from_secs(10); + drop(i1); + tree.expire(t_2); + assert!(!tree.flows.contains_key(&FLOW_IN)); + assert!(!tree.flows.contains_key(&FLOW_OUT)); + assert_eq!(tree.internal.len(), 1); + + let r0c = ApiFullCounter::from(r0.as_ref()); + assert_eq!(r0c.allow, 0); + assert_eq!(r0c.packets.pkts_in, 0); + assert_eq!(r0c.packets.pkts_out, 0); + assert_eq!(r0c.packets.bytes_in, 0); + assert_eq!(r0c.packets.bytes_out, 0); + + let i0c = ApiFullCounter::from(i0.as_ref()); + assert_eq!(i0c.allow, 2); + assert_eq!(i0c.packets.pkts_in, 4); + assert_eq!(i0c.packets.pkts_out, 1); + assert_eq!(i0c.packets.bytes_in, 544); + assert_eq!(i0c.packets.bytes_out, 64); + + for el in [ + ApiFullCounter::from(r1.as_ref()), + ApiFullCounter::from(r2.as_ref()), + ApiFullCounter::from(r3.as_ref()), + ] { + assert_eq!(el.allow, 1); + assert_eq!(el.packets.pkts_in, 4); + assert_eq!(el.packets.pkts_out, 1); + assert_eq!(el.packets.bytes_in, 544); + assert_eq!(el.packets.bytes_out, 64); + } + + // Now the LFT entry bound to r0 has gone away, and some other flows + // have written into the root stat. Expect that i0's stats have been + // folded into it. + let t_3 = t_2 + Duration::from_secs(10); + drop(i0); + r0.body.act(Action::Allow, 1001, Direction::In); + r0.body.act(Action::Allow, 1002, Direction::Out); + r0.body.act(Action::Deny, 64, Direction::Out); + r0.body.act(Action::Deny, 129, Direction::In); + r0.body.act(Action::Hairpin, 32, Direction::Out); + tree.expire(t_3); + + let r0c = ApiFullCounter::from(r0.as_ref()); + assert_eq!(r0c.allow, 4); + assert_eq!(r0c.deny, 2); + assert_eq!(r0c.hairpin, 1); + assert_eq!(r0c.packets.pkts_in, 6); + assert_eq!(r0c.packets.pkts_out, 4); + assert_eq!(r0c.packets.bytes_in, 1674); + assert_eq!(r0c.packets.bytes_out, 1162); + + // Children should be empty on all roots. + for el in [r0, r1, r2, r3] { + let children = el.body.children.read(); + assert!(children.is_empty()); + } + } + + #[test] + fn root_counters() { + todo!() + } } From a71a69a169fc8eed6304cad95161e30dd91a021b Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Wed, 18 Jun 2025 16:30:29 +0100 Subject: [PATCH 17/37] Stats as observed from `Root`s Now we have the primitives for flow and rule stats. Let's go! --- lib/opte/src/engine/stat.rs | 243 +++++++++++++++++++++++++++++++----- 1 file changed, 213 insertions(+), 30 deletions(-) diff --git a/lib/opte/src/engine/stat.rs b/lib/opte/src/engine/stat.rs index 10388a08..8f98bc6e 100644 --- a/lib/opte/src/engine/stat.rs +++ b/lib/opte/src/engine/stat.rs @@ -221,7 +221,35 @@ impl StatChild { fn is_alive(&self) -> bool { match self { Self::Internal(i) => i.strong_count() != 0, - Self::Flow(i) => i.strong_count() != 0, + Self::Flow(f) => f.strong_count() != 0, + } + } + + fn upgrade(&self) -> Option { + match self { + Self::Internal(i) => i.upgrade().map(StrongStatChild::Internal), + Self::Flow(f) => f.upgrade().map(StrongStatChild::Flow), + } + } +} + +enum StrongStatChild { + Internal(Arc), + Flow(Arc), +} + +impl StrongStatChild { + fn global_id(&self) -> StatId { + match self { + Self::Internal(i) => i.body.stats.id(), + Self::Flow(f) => f.shared.stats.id, + } + } + + fn combine_api(&self, into: &mut ApiFullCounter) { + match self { + Self::Internal(i) => i.body.stats.combine_api(into), + Self::Flow(f) => f.shared.stats.combine_api(&mut into.packets), } } } @@ -241,6 +269,33 @@ impl RootStat { fn record_hit(&self, time: Moment) { self.last_hit.store(time.raw(), Ordering::Relaxed); } + + fn combined_stats(&self) -> ApiFullCounter { + let mut visited = BTreeSet::new(); + + let mut scratch = ApiFullCounter::from(&self.body.stats); + let mut to_visit = { + let children = self.body.children.read(); + children.clone() + }; + + while let Some(node) = to_visit.pop() { + let Some(inode) = node.upgrade() else { continue }; + let id = inode.global_id(); + if !visited.insert(id) { + continue; + } + + inode.combine_api(&mut scratch); + + if let StrongStatChild::Internal(i) = inode { + let children = i.body.children.read(); + to_visit.extend_from_slice(&children); + } + } + + scratch + } } /// Temporary counters associated with an LFT entry. @@ -337,6 +392,13 @@ impl PacketCounter { Ordering::Relaxed, ); } + + fn combine_api(&self, into: &mut ApiPktCounter) { + into.pkts_in += self.pkts_in.load(Ordering::Relaxed); + into.bytes_in += self.bytes_in.load(Ordering::Relaxed); + into.pkts_out += self.pkts_out.load(Ordering::Relaxed); + into.bytes_out += self.bytes_out.load(Ordering::Relaxed); + } } impl From<&PacketCounter> for ApiPktCounter { @@ -379,6 +441,13 @@ impl FullCounter { .fetch_add(self.hairpin.load(Ordering::Relaxed), Ordering::Relaxed); } + fn combine_api(&self, into: &mut ApiFullCounter) { + self.packets.combine_api(&mut into.packets); + into.allow += self.allow.load(Ordering::Relaxed); + into.deny += self.deny.load(Ordering::Relaxed); + into.hairpin += self.hairpin.load(Ordering::Relaxed); + } + #[inline] fn id(&self) -> StatId { self.packets.id @@ -408,34 +477,6 @@ impl From<&InternalStat> for ApiFullCounter { } } -pub trait FoldStat: Send + Sync { - fn fold(&self, into: &FullCounter, visited: &mut BTreeSet); -} - -impl FoldStat for FlowStat { - fn fold(&self, into: &FullCounter, visited: &mut BTreeSet) { - if !visited.insert(self.shared.stats.id) { - self.shared.stats.combine(&into.packets); - } - } -} - -impl FoldStat for InternalStat { - fn fold(&self, into: &FullCounter, visited: &mut BTreeSet) { - if !visited.insert(self.body.stats.id()) { - self.body.stats.combine(into); - } - } -} - -impl FoldStat for RootStat { - fn fold(&self, into: &FullCounter, visited: &mut BTreeSet) { - if !visited.insert(self.body.stats.id()) { - self.body.stats.combine(into); - } - } -} - /// Manager of all stat/counter objects within a port. /// /// @@ -689,6 +730,24 @@ impl StatTree { }); } + /// Return a snapshot of collated stats for a given root. + /// + /// This will include the values of all downstream children, + /// but may be susceptible to partial reads between individual counters. + pub fn root_stat(&self, id: &Uuid) -> Option { + self.roots.get(id).map(|v| RootStat::combined_stats(v)) + } + + /// Return a snapshot of collated stats for all present roots. + /// + /// This will include the values of all downstream children, + /// but may be susceptible to partial reads between individual counters. + pub fn all_root_stats( + &self, + ) -> impl Iterator { + self.roots.iter().map(|(k, v)| (k, v.combined_stats())) + } + // TEMP pub fn dump(&self) -> String { let mut out = String::new(); @@ -868,6 +927,24 @@ mod tests { proto_info: [53, 12345], }; + const FLOW_OUT_2: InnerFlowId = InnerFlowId { + proto: IpProtocol::TCP.0, + addrs: AddrPair::V4 { + src: Ipv4Addr::from_const([10, 0, 0, 1]), + dst: Ipv4Addr::from_const([1, 1, 1, 1]), + }, + proto_info: [23456, 80], + }; + + const FLOW_IN_2: InnerFlowId = InnerFlowId { + proto: IpProtocol::TCP.0, + addrs: AddrPair::V4 { + dst: Ipv4Addr::from_const([10, 0, 0, 1]), + src: Ipv4Addr::from_const([1, 1, 1, 1]), + }, + proto_info: [80, 23456], + }; + #[test] fn flow_stat_deny() { // Assert that all (non-terminal) layers are counted as an 'accept'. @@ -957,6 +1034,7 @@ mod tests { ) }; f_out.hit(p_sz); + assert_eq!(f_out.bases, vec![r0.id, r3.id].into_iter().collect()); let f_in = { let mut fb = FlowStatBuilder::new(); @@ -971,6 +1049,7 @@ mod tests { ) }; f_in.hit(p_sz); + assert_eq!(f_in.bases, vec![r0.id, r1.id, r2.id].into_iter().collect()); // These should refer to the same block of packet counters. assert!(Arc::ptr_eq(&f_out.shared, &f_in.shared)); @@ -1064,6 +1143,110 @@ mod tests { #[test] fn root_counters() { - todo!() + let mut tree = StatTree::default(); + + let r0 = tree.root(Some(ROOT_0)); + let r1 = tree.root(Some(ROOT_1)); + let r2 = tree.root(Some(ROOT_2)); + let r3 = tree.root(Some(ROOT_3)); + + let i0 = tree.new_intermediate(vec![r0.clone().into()]); + let i1 = tree.new_intermediate(vec![r1.clone().into()]); + + let f0_out = { + let mut fb = FlowStatBuilder::new(); + fb.push(i0.clone()); + tree.new_flow( + &FLOW_OUT, + &FLOW_IN, + Direction::Out, + fb.terminate(Action::Allow, 72, Direction::Out, true).unwrap(), + ) + }; + f0_out.hit(72); + + let f0_in = { + let mut fb = FlowStatBuilder::new(); + fb.push(i0.clone()); + fb.push(i1.clone()); + fb.push(r2.clone()); + tree.new_flow( + &FLOW_IN, + &FLOW_OUT, + Direction::In, + fb.terminate(Action::Allow, 72, Direction::In, true).unwrap(), + ) + }; + f0_in.hit(72); + + let f1_out = { + let mut fb = FlowStatBuilder::new(); + fb.push(i0.clone()); + fb.push(r2.clone()); + fb.push(r3.clone()); + tree.new_flow( + &FLOW_OUT_2, + &FLOW_IN_2, + Direction::Out, + fb.terminate(Action::Allow, 72, Direction::Out, true).unwrap(), + ) + }; + f1_out.hit(72); + + let t0 = Moment::now(); + let t1 = t0 + Duration::from_secs(7); + + f0_out.hit(72); + f0_out.hit(72); + f0_out.hit(1500); + f0_out.hit(1500); + f0_out.hit(1500); + + f0_in.hit(72); + f0_in.hit(60); + f0_in.hit(60); + f0_in.hit(60); + + f1_out.hit_at(60, t1); + f1_out.hit_at(60, t1); + f1_out.hit_at(60, t1); + + drop(i0); + drop(i1); + + // Verify that flow stats remain correct as flows/internal nodes + // are expired. + for i in 0..=15 { + let checkpoint = t1 + Duration::from_secs(i); + tree.expire(checkpoint); + + let r0_s = tree.root_stat(&ROOT_0).unwrap(); + assert_eq!(r0_s.allow, 3, "t={i}"); + assert_eq!(r0_s.packets.pkts_out, 10, "t={i}"); + assert_eq!(r0_s.packets.bytes_out, 4968, "t={i}"); + assert_eq!(r0_s.packets.pkts_in, 5, "t={i}"); + assert_eq!(r0_s.packets.bytes_in, 324, "t={i}"); + + let r1_s = tree.root_stat(&ROOT_1).unwrap(); + assert_eq!(r1_s.allow, 1, "t={i}"); + assert_eq!(r1_s.packets.pkts_out, 6, "t={i}"); + assert_eq!(r1_s.packets.bytes_out, 4716, "t={i}"); + assert_eq!(r1_s.packets.pkts_in, 5, "t={i}"); + assert_eq!(r1_s.packets.bytes_in, 324, "t={i}"); + + let r2_s = tree.root_stat(&ROOT_2).unwrap(); + assert_eq!(r2_s.allow, 2, "t={i}"); + assert_eq!(r2_s.packets.pkts_out, 10, "t={i}"); + assert_eq!(r2_s.packets.bytes_out, 4968, "t={i}"); + assert_eq!(r2_s.packets.pkts_in, 5, "t={i}"); + assert_eq!(r2_s.packets.bytes_in, 324, "t={i}"); + + let r3_s = tree.root_stat(&ROOT_3).unwrap(); + assert_eq!(r3_s.allow, 1, "t={i}"); + assert_eq!(r3_s.packets.pkts_out, 4, "t={i}"); + assert_eq!(r3_s.packets.bytes_out, 252, "t={i}"); + assert_eq!(r3_s.packets.pkts_in, 0, "t={i}"); + assert_eq!(r3_s.packets.bytes_in, 0, "t={i}"); + } } } From 1ceb9b1b667c828d1797066c00c252bbdc398621 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Wed, 18 Jun 2025 16:43:31 +0100 Subject: [PATCH 18/37] Tweaks --- lib/opte/src/engine/stat.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/opte/src/engine/stat.rs b/lib/opte/src/engine/stat.rs index 8f98bc6e..305f75e2 100644 --- a/lib/opte/src/engine/stat.rs +++ b/lib/opte/src/engine/stat.rs @@ -553,8 +553,9 @@ impl StatTree { // -- flow exists for ~2min actively // EPOCH 1 -- firewall rule change occurs // -- flow *now* has parents a, c, d - // -- flow closes, but b' and c should receive the packet - // byte/counts split at the epoch 0->1 transition. + // -- flow closes + // In the above example, b' and c should receive the packet + // byte/counts split at the epoch 0->1 transition. return e.get().clone(); } @@ -897,10 +898,9 @@ impl Eq for ById {} #[cfg(test)] mod tests { - use core::time::Duration; - use super::*; use crate::api::AddrPair; + use core::time::Duration; use ingot::ip::IpProtocol; use opte_api::Ipv4Addr; From c0f90130b91c9a491ec945190425c20aec0ef1b6 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Fri, 20 Jun 2025 20:16:49 +0100 Subject: [PATCH 19/37] Add optional stat ID to fw rules/routes --- bench/src/packet.rs | 21 +- bin/opteadm/src/bin/opteadm.rs | 10 +- lib/opte-test-utils/src/lib.rs | 12 +- lib/oxide-vpc/src/api/mod.rs | 21 +- lib/oxide-vpc/src/engine/firewall.rs | 52 +++-- lib/oxide-vpc/src/engine/router.rs | 32 +-- lib/oxide-vpc/tests/firewall_tests.rs | 8 +- lib/oxide-vpc/tests/integration_tests.rs | 271 +++++++++++++++-------- xde-tests/src/lib.rs | 11 +- xde/src/xde.rs | 10 +- 10 files changed, 276 insertions(+), 172 deletions(-) diff --git a/bench/src/packet.rs b/bench/src/packet.rs index b555f946..9054ce66 100644 --- a/bench/src/packet.rs +++ b/bench/src/packet.rs @@ -26,6 +26,7 @@ use opte_test_utils::icmp::gen_icmp_echo; use opte_test_utils::icmp::gen_icmpv6_echo; use opte_test_utils::icmp::generate_ndisc; use opte_test_utils::*; +use oxide_vpc::api::Route; pub type TestCase = (MsgBlk, Direction); @@ -294,18 +295,24 @@ impl BenchPacketInstance for UlpProcessInstance { router::add_entry( &g1.port, - IpCidr::Ip4("0.0.0.0/0".parse().unwrap()), - RouterTarget::InternetGateway(None), - RouterClass::System, + Route { + dest: IpCidr::Ip4("0.0.0.0/0".parse().unwrap()), + target: RouterTarget::InternetGateway(None), + class: RouterClass::System, + stat_id: None, + }, ) .unwrap(); incr!(g1, ["epoch", "router.rules.out"]); router::add_entry( &g1.port, - IpCidr::Ip6("::/0".parse().unwrap()), - RouterTarget::InternetGateway(None), - RouterClass::System, + Route { + dest: IpCidr::Ip6("::/0".parse().unwrap()), + target: RouterTarget::InternetGateway(None), + class: RouterClass::System, + stat_id: None, + }, ) .unwrap(); incr!(g1, ["epoch", "router.rules.out"]); @@ -314,7 +321,7 @@ impl BenchPacketInstance for UlpProcessInstance { let any_in = "dir=in action=allow priority=1000 protocol=any"; firewall::set_fw_rules( &g1.port, - &SetFwRulesReq { + SetFwRulesReq { port_name: g1.port.name().to_string(), rules: vec![any_in.parse().unwrap()], }, diff --git a/bin/opteadm/src/bin/opteadm.rs b/bin/opteadm/src/bin/opteadm.rs index 8aad5f28..6372a5a5 100644 --- a/bin/opteadm/src/bin/opteadm.rs +++ b/bin/opteadm/src/bin/opteadm.rs @@ -46,6 +46,7 @@ use oxide_vpc::api::Ports; use oxide_vpc::api::ProtoFilter; use oxide_vpc::api::RemFwRuleReq; use oxide_vpc::api::RemoveCidrResp; +use oxide_vpc::api::Route; use oxide_vpc::api::RouterClass; use oxide_vpc::api::RouterTarget; use oxide_vpc::api::SNat4Cfg; @@ -640,6 +641,7 @@ fn main() -> anyhow::Result<()> { filters: filters.into(), action, priority, + stat_id: None, }; hdl.add_firewall_rule(&AddFwRuleReq { port_name: port, rule })?; } @@ -775,16 +777,16 @@ fn main() -> anyhow::Result<()> { Command::AddRouterEntry { route: RouterRule { port, dest, target, class }, } => { - let req = - AddRouterEntryReq { port_name: port, dest, target, class }; + let route = Route { dest, target, class, stat_id: None }; + let req = AddRouterEntryReq { port_name: port, route }; hdl.add_router_entry(&req)?; } Command::DelRouterEntry { route: RouterRule { port, dest, target, class }, } => { - let req = - DelRouterEntryReq { port_name: port, dest, target, class }; + let route = Route { dest, target, class, stat_id: None }; + let req = DelRouterEntryReq { port_name: port, route }; if let DelRouterEntryResp::NotFound = hdl.del_router_entry(&req)? { anyhow::bail!( "could not delete entry -- no matching rule found" diff --git a/lib/opte-test-utils/src/lib.rs b/lib/opte-test-utils/src/lib.rs index 5e9e3e37..9ab989fd 100644 --- a/lib/opte-test-utils/src/lib.rs +++ b/lib/opte-test-utils/src/lib.rs @@ -72,6 +72,7 @@ pub use oxide_vpc::api::IpCfg; pub use oxide_vpc::api::Ipv4Cfg; pub use oxide_vpc::api::Ipv6Cfg; pub use oxide_vpc::api::PhysNet; +use oxide_vpc::api::Route; pub use oxide_vpc::api::RouterClass; pub use oxide_vpc::api::RouterTarget; pub use oxide_vpc::api::SNat4Cfg; @@ -372,9 +373,12 @@ pub fn oxide_net_setup2( // on same subnet. router::add_entry( &port, - IpCidr::Ip4(cfg.ipv4().vpc_subnet), - RouterTarget::VpcSubnet(IpCidr::Ip4(cfg.ipv4().vpc_subnet)), - RouterClass::System, + Route { + dest: IpCidr::Ip4(cfg.ipv4().vpc_subnet), + target: RouterTarget::VpcSubnet(IpCidr::Ip4(cfg.ipv4().vpc_subnet)), + class: RouterClass::System, + stat_id: None, + }, ) .unwrap(); @@ -457,7 +461,7 @@ fn set_default_fw_rules(pav: &mut PortAndVps, cfg: &VpcCfg) { format!("dir=in action=allow priority=65534 hosts=vni={}", cfg.vni,); firewall::set_fw_rules( &pav.port, - &SetFwRulesReq { + SetFwRulesReq { port_name: pav.port.name().to_string(), rules: vec![ vpc_in.parse().unwrap(), diff --git a/lib/oxide-vpc/src/api/mod.rs b/lib/oxide-vpc/src/api/mod.rs index 520499e4..f4aac8f0 100644 --- a/lib/oxide-vpc/src/api/mod.rs +++ b/lib/oxide-vpc/src/api/mod.rs @@ -581,14 +581,20 @@ pub struct ClearVirt2BoundaryReq { pub tep: Vec, } +#[derive(Copy, Clone, Debug, Deserialize, Serialize)] +pub struct Route { + pub dest: IpCidr, + pub target: RouterTarget, + pub class: RouterClass, + pub stat_id: Option, +} + /// Add an entry to the router. Addresses may be either IPv4 or IPv6, though the /// destination and target must match in protocol version. #[derive(Clone, Debug, Deserialize, Serialize)] pub struct AddRouterEntryReq { pub port_name: String, - pub dest: IpCidr, - pub target: RouterTarget, - pub class: RouterClass, + pub route: Route, } /// Remove an entry to the router. Addresses may be either IPv4 or IPv6, though the @@ -596,9 +602,7 @@ pub struct AddRouterEntryReq { #[derive(Clone, Debug, Deserialize, Serialize)] pub struct DelRouterEntryReq { pub port_name: String, - pub dest: IpCidr, - pub target: RouterTarget, - pub class: RouterClass, + pub route: Route, } #[derive(Clone, Debug, Deserialize, Serialize)] @@ -623,7 +627,7 @@ pub struct AddFwRuleReq { pub rule: FirewallRule, } -#[derive(Debug, Deserialize, Serialize)] +#[derive(Clone, Debug, Deserialize, Serialize)] pub struct SetFwRulesReq { pub port_name: String, pub rules: Vec, @@ -642,6 +646,7 @@ pub struct FirewallRule { pub filters: Filters, pub action: FirewallAction, pub priority: u16, + pub stat_id: Option, } // TEMP @@ -724,10 +729,10 @@ impl FromStr for FirewallRule { Ok(FirewallRule { direction: direction.unwrap(), - // target.unwrap(), filters, action: action.unwrap(), priority: priority.unwrap(), + stat_id: None, }) } } diff --git a/lib/oxide-vpc/src/engine/firewall.rs b/lib/oxide-vpc/src/engine/firewall.rs index b2740654..7a1222ba 100644 --- a/lib/oxide-vpc/src/engine/firewall.rs +++ b/lib/oxide-vpc/src/engine/firewall.rs @@ -75,42 +75,33 @@ pub fn setup( pub fn add_fw_rule( port: &Port, - req: &AddFwRuleReq, + req: AddFwRuleReq, ) -> Result<(), OpteError> { - let action = match req.rule.action { - FirewallAction::Allow => Action::StatefulAllow, - FirewallAction::Deny => Action::Deny, - }; - - let rule = from_fw_rule(req.rule.clone(), action); - port.add_rule(FW_LAYER_NAME, req.rule.direction, rule) + let dir = req.rule.direction; + let rule = from_fw_rule(req.rule); + port.add_rule(FW_LAYER_NAME, dir, rule) } pub fn rem_fw_rule( port: &Port, - req: &RemFwRuleReq, + req: RemFwRuleReq, ) -> Result<(), OpteError> { port.remove_rule(FW_LAYER_NAME, req.dir, req.id) } pub fn set_fw_rules( port: &Port, - req: &SetFwRulesReq, + req: SetFwRulesReq, ) -> Result<(), OpteError> { let mut in_rules = vec![]; let mut out_rules = vec![]; - for fwr in &req.rules { - let action = match fwr.action { - FirewallAction::Allow => Action::StatefulAllow, - FirewallAction::Deny => Action::Deny, - }; - - let rule = from_fw_rule(fwr.clone(), action); - if fwr.direction == Direction::In { - in_rules.push(rule); - } else { - out_rules.push(rule); + for fwr in req.rules { + let dir = fwr.direction; + let rule = from_fw_rule(fwr); + match dir { + Direction::In => in_rules.push(rule), + Direction::Out => out_rules.push(rule), } } @@ -119,16 +110,23 @@ pub fn set_fw_rules( pub struct Firewall {} -pub fn from_fw_rule(fw_rule: FirewallRule, action: Action) -> Rule { - let addr_pred = fw_rule.filters.hosts().into_predicate(fw_rule.direction); - let proto_preds = fw_rule.filters.protocol().into_predicates(); - let port_pred = fw_rule.filters.ports().into_predicate(); +pub fn from_fw_rule(fw_rule: FirewallRule) -> Rule { + let FirewallRule { direction, filters, action, priority, stat_id } = + fw_rule; + + let action = match action { + FirewallAction::Allow => Action::StatefulAllow, + FirewallAction::Deny => Action::Deny, + }; + let addr_pred = filters.hosts().into_predicate(direction); + let proto_preds = filters.protocol().into_predicates(); + let port_pred = filters.ports().into_predicate(); if addr_pred.is_none() && proto_preds.is_empty() && port_pred.is_none() { - return Rule::match_any(fw_rule.priority, action); + return Rule::match_any_with_id(priority, action, stat_id); } - let mut rule = Rule::new(fw_rule.priority, action); + let mut rule = Rule::new_with_id(priority, action, stat_id); rule.add_predicates(proto_preds); diff --git a/lib/oxide-vpc/src/engine/router.rs b/lib/oxide-vpc/src/engine/router.rs index d93594c3..c9e8ed0c 100644 --- a/lib/oxide-vpc/src/engine/router.rs +++ b/lib/oxide-vpc/src/engine/router.rs @@ -11,6 +11,7 @@ use super::VpcNetwork; use super::firewall as fw; use crate::api::DelRouterEntryResp; +use crate::api::Route; use crate::api::RouterClass; use crate::api::RouterTarget; use crate::api::stat::*; @@ -286,11 +287,8 @@ fn valid_router_dest_target_pair(dest: &IpCidr, target: &RouterTarget) -> bool { ) } -fn make_rule( - dest: IpCidr, - target: RouterTarget, - class: RouterClass, -) -> Result, OpteError> { +fn make_rule(route: Route) -> Result, OpteError> { + let Route { dest, target, class, stat_id } = route; if !valid_router_dest_target_pair(&dest, &target) { return Err(OpteError::InvalidRouterEntry { dest, @@ -362,7 +360,7 @@ fn make_rule( }; let priority = compute_rule_priority(&dest, class); - let mut rule = Rule::new(priority, action); + let mut rule = Rule::new_with_id(priority, action, stat_id); rule.add_predicate(predicate); Ok(rule.finalize()) @@ -374,11 +372,9 @@ fn make_rule( /// destination [`IpCidr`] as well as its paired [`RouterTarget`]. pub fn del_entry( port: &Port, - dest: IpCidr, - target: RouterTarget, - class: RouterClass, + route: Route, ) -> Result { - let rule = make_rule(dest, target, class)?; + let rule = make_rule(route)?; let maybe_id = port.find_rule(ROUTER_LAYER_NAME, Direction::Out, &rule)?; match maybe_id { Some(id) => { @@ -395,11 +391,9 @@ pub fn del_entry( /// Route the [`IpCidr`] to the specified [`RouterTarget`]. pub fn add_entry( port: &Port, - dest: IpCidr, - target: RouterTarget, - class: RouterClass, + route: Route, ) -> Result { - let rule = make_rule(dest, target, class)?; + let rule = make_rule(route)?; port.add_rule(ROUTER_LAYER_NAME, Direction::Out, rule)?; Ok(NoResp::default()) } @@ -407,14 +401,12 @@ pub fn add_entry( /// Replace the current set of router entries with the set passed in. pub fn replace( port: &Port, - entries: Vec<(IpCidr, RouterTarget, RouterClass)>, + entries: &[Route], ) -> Result { - let mut out_rules = Vec::with_capacity(entries.len()); - for (cidr, target, class) in entries { - out_rules.push(make_rule(cidr, target, class)?); - } + let out_rules: Result, _> = + entries.iter().copied().map(make_rule).collect(); - port.set_rules(ROUTER_LAYER_NAME, vec![], out_rules)?; + port.set_rules(ROUTER_LAYER_NAME, vec![], out_rules?)?; Ok(NoResp::default()) } diff --git a/lib/oxide-vpc/tests/firewall_tests.rs b/lib/oxide-vpc/tests/firewall_tests.rs index 0be752fe..c4918a3a 100644 --- a/lib/oxide-vpc/tests/firewall_tests.rs +++ b/lib/oxide-vpc/tests/firewall_tests.rs @@ -27,7 +27,7 @@ fn firewall_replace_rules() { let rule = "dir=in action=allow priority=10 protocol=TCP"; firewall::add_fw_rule( &g2.port, - &AddFwRuleReq { + AddFwRuleReq { port_name: g2.port.name().to_string(), rule: rule.parse().unwrap(), }, @@ -64,7 +64,7 @@ fn firewall_replace_rules() { let tcp_out = "dir=out action=allow priority=1000 protocol=TCP"; firewall::set_fw_rules( &g1.port, - &SetFwRulesReq { + SetFwRulesReq { port_name: g1.port.name().to_string(), rules: vec![any_out.parse().unwrap(), tcp_out.parse().unwrap()], }, @@ -124,7 +124,7 @@ fn firewall_replace_rules() { let new_rule = "dir=in action=deny priority=1000 protocol=TCP"; firewall::set_fw_rules( &g2.port, - &SetFwRulesReq { + SetFwRulesReq { port_name: g2.port.name().to_string(), rules: vec![new_rule.parse().unwrap()], }, @@ -282,7 +282,7 @@ fn firewall_vni_outbound() { format!("dir=out action=allow priority=1000 hosts=vni={}", g1_cfg.vni); firewall::set_fw_rules( &g1.port, - &SetFwRulesReq { + SetFwRulesReq { port_name: g1.port.name().to_string(), rules: vec![any_out.parse().unwrap(), vni_out.parse().unwrap()], }, diff --git a/lib/oxide-vpc/tests/integration_tests.rs b/lib/oxide-vpc/tests/integration_tests.rs index 1d1e9726..2dec9d1f 100644 --- a/lib/oxide-vpc/tests/integration_tests.rs +++ b/lib/oxide-vpc/tests/integration_tests.rs @@ -57,6 +57,7 @@ use opte_test_utils as common; use oxide_vpc::api::BOUNDARY_SERVICES_VNI; use oxide_vpc::api::ExternalIpCfg; use oxide_vpc::api::FirewallRule; +use oxide_vpc::api::Route; use oxide_vpc::api::RouterClass; use oxide_vpc::api::VpcCfg; use pcap::*; @@ -213,7 +214,7 @@ fn port_transition_pause() { "action=allow priority=10 dir=in protocol=tcp port=80".parse().unwrap(); firewall::add_fw_rule( &g1.port, - &AddFwRuleReq { port_name: g1.port.name().to_string(), rule: fw_rule }, + AddFwRuleReq { port_name: g1.port.name().to_string(), rule: fw_rule }, ) .unwrap(); incr!(g1, ["epoch", "firewall.rules.in"]); @@ -270,11 +271,14 @@ fn port_transition_pause() { assert!(matches!( router::del_entry( &g2.port, - IpCidr::Ip4(g2_cfg.ipv4_cfg().unwrap().vpc_subnet), - RouterTarget::VpcSubnet(IpCidr::Ip4( - g2_cfg.ipv4_cfg().unwrap().vpc_subnet - )), - RouterClass::System, + Route { + dest: IpCidr::Ip4(g2_cfg.ipv4_cfg().unwrap().vpc_subnet), + target: RouterTarget::VpcSubnet(IpCidr::Ip4( + g2_cfg.ipv4_cfg().unwrap().vpc_subnet + )), + class: RouterClass::System, + stat_id: None, + } ), Err(OpteError::BadState(_)) )); @@ -287,7 +291,7 @@ fn port_transition_pause() { // This exercises Port::add_rule(). let res = firewall::add_fw_rule( &g2.port, - &AddFwRuleReq { + AddFwRuleReq { port_name: g2.port.name().to_string(), rule: fw_rule.clone(), }, @@ -295,7 +299,7 @@ fn port_transition_pause() { assert!(matches!(res, Err(OpteError::BadState(_)))); let res = firewall::set_fw_rules( &g2.port, - &SetFwRulesReq { + SetFwRulesReq { port_name: g2.port.name().to_string(), rules: vec![fw_rule], }, @@ -332,7 +336,7 @@ fn add_remove_fw_rule() { let rule = "dir=in action=allow priority=10 protocol=TCP"; firewall::add_fw_rule( &g1.port, - &AddFwRuleReq { + AddFwRuleReq { port_name: g1.port.name().to_string(), rule: rule.parse().unwrap(), }, @@ -343,7 +347,7 @@ fn add_remove_fw_rule() { // Remove the rule just added, by ID. firewall::rem_fw_rule( &g1.port, - &oxide_vpc::api::RemFwRuleReq { + oxide_vpc::api::RemFwRuleReq { port_name: g1.port.name().to_string(), dir: In, id: 0, @@ -496,9 +500,14 @@ fn guest_to_guest_no_route() { // Make sure the router is configured to drop all packets. router::del_entry( &g1.port, - IpCidr::Ip4(g1_cfg.ipv4().vpc_subnet), - RouterTarget::VpcSubnet(IpCidr::Ip4(g1_cfg.ipv4().vpc_subnet)), - RouterClass::System, + Route { + dest: IpCidr::Ip4(g1_cfg.ipv4().vpc_subnet), + target: RouterTarget::VpcSubnet(IpCidr::Ip4( + g1_cfg.ipv4().vpc_subnet, + )), + class: RouterClass::System, + stat_id: None, + }, ) .unwrap(); update!(g1, ["incr:epoch", "set:router.rules.out=0"]); @@ -547,7 +556,7 @@ fn guest_to_guest() { let rule = "dir=in action=allow priority=10 protocol=TCP"; firewall::add_fw_rule( &g2.port, - &AddFwRuleReq { + AddFwRuleReq { port_name: g2.port.name().to_string(), rule: rule.parse().unwrap(), }, @@ -716,7 +725,7 @@ fn guest_to_guest_diff_vpc_no_peer() { let rule = "dir=in action=allow priority=10 protocol=TCP"; firewall::add_fw_rule( &g2.port, - &AddFwRuleReq { + AddFwRuleReq { port_name: g2.port.name().to_string(), rule: rule.parse().unwrap(), }, @@ -757,9 +766,12 @@ fn guest_to_internet_ipv4() { // Add router entry that allows g1 to route to internet. router::add_entry( &g1.port, - IpCidr::Ip4("0.0.0.0/0".parse().unwrap()), - RouterTarget::InternetGateway(None), - RouterClass::System, + Route { + dest: IpCidr::Ip4("0.0.0.0/0".parse().unwrap()), + target: RouterTarget::InternetGateway(None), + class: RouterClass::System, + stat_id: None, + }, ) .unwrap(); incr!(g1, ["epoch", "router.rules.out"]); @@ -869,9 +881,12 @@ fn guest_to_internet_ipv6() { // Add router entry that allows g1 to route to internet. router::add_entry( &g1.port, - IpCidr::Ip6("::/0".parse().unwrap()), - RouterTarget::InternetGateway(None), - RouterClass::System, + Route { + dest: IpCidr::Ip6("::/0".parse().unwrap()), + target: RouterTarget::InternetGateway(None), + class: RouterClass::System, + stat_id: None, + }, ) .unwrap(); incr!(g1, ["epoch", "router.rules.out"]); @@ -1049,17 +1064,23 @@ fn multi_external_ip_setup( // Add router entry that allows g1 to route to internet. router::add_entry( &g1.port, - IpCidr::Ip6("::/0".parse().unwrap()), - RouterTarget::InternetGateway(None), - RouterClass::System, + Route { + dest: IpCidr::Ip6("::/0".parse().unwrap()), + target: RouterTarget::InternetGateway(None), + class: RouterClass::System, + stat_id: None, + }, ) .unwrap(); incr!(g1, ["epoch", "router.rules.out"]); router::add_entry( &g1.port, - IpCidr::Ip4("0.0.0.0/0".parse().unwrap()), - RouterTarget::InternetGateway(None), - RouterClass::System, + Route { + dest: IpCidr::Ip4("0.0.0.0/0".parse().unwrap()), + target: RouterTarget::InternetGateway(None), + class: RouterClass::System, + stat_id: None, + }, ) .unwrap(); incr!(g1, ["epoch", "router.rules.out"]); @@ -1068,7 +1089,7 @@ fn multi_external_ip_setup( let rule = "dir=in action=allow priority=10 protocol=TCP"; firewall::add_fw_rule( &g1.port, - &AddFwRuleReq { + AddFwRuleReq { port_name: g1.port.name().to_string(), rule: rule.parse().unwrap(), }, @@ -1671,17 +1692,23 @@ fn snat_icmp_shared_echo_rewrite(dst_ip: IpAddr) { // Add router entries that allow g1 to route to internet. router::add_entry( &g1.port, - IpCidr::Ip6("::/0".parse().unwrap()), - RouterTarget::InternetGateway(None), - RouterClass::System, + Route { + dest: IpCidr::Ip6("::/0".parse().unwrap()), + target: RouterTarget::InternetGateway(None), + class: RouterClass::System, + stat_id: None, + }, ) .unwrap(); incr!(g1, ["epoch", "router.rules.out"]); router::add_entry( &g1.port, - IpCidr::Ip4("0.0.0.0/0".parse().unwrap()), - RouterTarget::InternetGateway(None), - RouterClass::System, + Route { + dest: IpCidr::Ip4("0.0.0.0/0".parse().unwrap()), + target: RouterTarget::InternetGateway(None), + class: RouterClass::System, + stat_id: None, + }, ) .unwrap(); incr!(g1, ["epoch", "router.rules.out"]); @@ -2565,9 +2592,12 @@ fn outbound_ndp_dropped() { router::add_entry( &g1.port, - IpCidr::Ip6(ipv6.vpc_subnet), - RouterTarget::VpcSubnet(IpCidr::Ip6(ipv6.vpc_subnet)), - RouterClass::System, + Route { + dest: IpCidr::Ip6(ipv6.vpc_subnet), + target: RouterTarget::VpcSubnet(IpCidr::Ip6(ipv6.vpc_subnet)), + class: RouterClass::System, + stat_id: None, + }, ) .unwrap(); incr!(g1, ["router.rules.out", "epoch"]); @@ -2575,9 +2605,12 @@ fn outbound_ndp_dropped() { // Add router entry that allows g1 to route to internet. router::add_entry( &g1.port, - IpCidr::Ip6("::/0".parse().unwrap()), - RouterTarget::InternetGateway(None), - RouterClass::System, + Route { + dest: IpCidr::Ip6("::/0".parse().unwrap()), + target: RouterTarget::InternetGateway(None), + class: RouterClass::System, + stat_id: None, + }, ) .unwrap(); incr!(g1, ["router.rules.out", "epoch"]); @@ -3078,9 +3111,12 @@ fn uft_lft_invalidation_out() { // Add default route. router::add_entry( &g1.port, - IpCidr::Ip4("0.0.0.0/0".parse().unwrap()), - RouterTarget::InternetGateway(None), - RouterClass::System, + Route { + dest: IpCidr::Ip4("0.0.0.0/0".parse().unwrap()), + target: RouterTarget::InternetGateway(None), + class: RouterClass::System, + stat_id: None, + }, ) .unwrap(); incr!(g1, ["epoch", "router.rules.out"]); @@ -3097,7 +3133,7 @@ fn uft_lft_invalidation_out() { let any_out = "dir=out action=deny priority=65535 protocol=any"; firewall::set_fw_rules( &g1.port, - &SetFwRulesReq { + SetFwRulesReq { port_name: g1.port.name().to_string(), rules: vec![any_out.parse().unwrap()], }, @@ -3168,9 +3204,12 @@ fn uft_lft_invalidation_in() { // Add default route. router::add_entry( &g1.port, - IpCidr::Ip4("0.0.0.0/0".parse().unwrap()), - RouterTarget::InternetGateway(None), - RouterClass::System, + Route { + dest: IpCidr::Ip4("0.0.0.0/0".parse().unwrap()), + target: RouterTarget::InternetGateway(None), + class: RouterClass::System, + stat_id: None, + }, ) .unwrap(); incr!(g1, ["epoch", "router.rules.out"]); @@ -3221,7 +3260,7 @@ fn uft_lft_invalidation_in() { let any_out = "dir=out action=deny priority=65535 protocol=any"; firewall::set_fw_rules( &g1.port, - &SetFwRulesReq { + SetFwRulesReq { port_name: g1.port.name().to_string(), rules: vec![any_out.parse().unwrap()], }, @@ -3486,9 +3525,12 @@ fn tcp_outbound() { // Add default route. router::add_entry( &g1.port, - IpCidr::Ip4("0.0.0.0/0".parse().unwrap()), - RouterTarget::InternetGateway(None), - RouterClass::System, + Route { + dest: IpCidr::Ip4("0.0.0.0/0".parse().unwrap()), + target: RouterTarget::InternetGateway(None), + class: RouterClass::System, + stat_id: None, + }, ) .unwrap(); incr!(g1, ["epoch", "router.rules.out"]); @@ -3537,7 +3579,7 @@ fn early_tcp_invalidation() { let rule = "dir=in action=allow priority=10 protocol=TCP"; firewall::add_fw_rule( &g1.port, - &AddFwRuleReq { + AddFwRuleReq { port_name: g1.port.name().to_string(), rule: rule.parse().unwrap(), }, @@ -3548,9 +3590,12 @@ fn early_tcp_invalidation() { // Add default route. router::add_entry( &g1.port, - IpCidr::Ip4("0.0.0.0/0".parse().unwrap()), - RouterTarget::InternetGateway(None), - RouterClass::System, + Route { + dest: IpCidr::Ip4("0.0.0.0/0".parse().unwrap()), + target: RouterTarget::InternetGateway(None), + class: RouterClass::System, + stat_id: None, + }, ) .unwrap(); incr!(g1, ["epoch", "router.rules.out"]); @@ -3737,9 +3782,12 @@ fn ephemeral_ip_preferred_over_snat_outbound() { // Add default route. router::add_entry( &g1.port, - IpCidr::Ip4("0.0.0.0/0".parse().unwrap()), - RouterTarget::InternetGateway(None), - RouterClass::System, + Route { + dest: IpCidr::Ip4("0.0.0.0/0".parse().unwrap()), + target: RouterTarget::InternetGateway(None), + class: RouterClass::System, + stat_id: None, + }, ) .unwrap(); incr!(g1, ["epoch", "router.rules.out"]); @@ -3829,9 +3877,12 @@ fn tcp_inbound() { // Add default route. router::add_entry( &g1.port, - IpCidr::Ip4("0.0.0.0/0".parse().unwrap()), - RouterTarget::InternetGateway(None), - RouterClass::System, + Route { + dest: IpCidr::Ip4("0.0.0.0/0".parse().unwrap()), + target: RouterTarget::InternetGateway(None), + class: RouterClass::System, + stat_id: None, + }, ) .unwrap(); incr!(g1, ["epoch", "router.rules.out"]); @@ -4112,9 +4163,12 @@ fn no_panic_on_flow_table_full() { // Add router entry that allows g1 to route to internet. router::add_entry( &g1.port, - IpCidr::Ip4("0.0.0.0/0".parse().unwrap()), - RouterTarget::InternetGateway(None), - RouterClass::System, + Route { + dest: IpCidr::Ip4("0.0.0.0/0".parse().unwrap()), + target: RouterTarget::InternetGateway(None), + class: RouterClass::System, + stat_id: None, + }, ) .unwrap(); incr!(g1, ["epoch", "router.rules.out"]); @@ -4164,9 +4218,12 @@ fn intra_subnet_routes_with_custom() { let cidr = IpCidr::Ip4("172.30.4.0/22".parse().unwrap()); router::add_entry( &g1.port, - cidr, - RouterTarget::VpcSubnet(cidr), - RouterClass::System, + Route { + dest: cidr, + target: RouterTarget::VpcSubnet(cidr), + class: RouterClass::System, + stat_id: None, + }, ) .unwrap(); incr!(g1, ["epoch", "router.rules.out"]); @@ -4215,8 +4272,16 @@ fn intra_subnet_routes_with_custom() { // Suppose the user now installs a 'custom' route in the first subnet to // drop traffic towards the second subnet. This rule must take priority. - router::add_entry(&g1.port, cidr, RouterTarget::Drop, RouterClass::Custom) - .unwrap(); + router::add_entry( + &g1.port, + Route { + dest: cidr, + target: RouterTarget::Drop, + class: RouterClass::Custom, + stat_id: None, + }, + ) + .unwrap(); incr!(g1, ["epoch", "router.rules.out"]); let mut pkt2_m = gen_icmpv4_echo_req( g1_cfg.guest_mac, @@ -4246,8 +4311,16 @@ fn intra_subnet_routes_with_custom() { ); // When the user removes this rule, traffic may flow again to subnet 2. - router::del_entry(&g1.port, cidr, RouterTarget::Drop, RouterClass::Custom) - .unwrap(); + router::del_entry( + &g1.port, + Route { + dest: cidr, + target: RouterTarget::Drop, + class: RouterClass::Custom, + stat_id: None, + }, + ) + .unwrap(); update!(g1, ["incr:epoch", "decr:router.rules.out"]); let mut pkt3_m = gen_icmpv4_echo_req( g1_cfg.guest_mac, @@ -4288,9 +4361,12 @@ fn port_as_router_target() { let dst_ip: Ipv4Addr = "192.168.0.1".parse().unwrap(); router::add_entry( &g1.port, - cidr, - RouterTarget::Ip(g2_cfg.ipv4().private_ip.into()), - RouterClass::Custom, + Route { + dest: cidr, + target: RouterTarget::Ip(g2_cfg.ipv4().private_ip.into()), + class: RouterClass::Custom, + stat_id: None, + }, ) .unwrap(); incr!(g1, ["epoch", "router.rules.out"]); @@ -4451,9 +4527,12 @@ fn select_eip_conditioned_on_igw() { // Add default route. router::add_entry( &g1.port, - IpCidr::Ip4("0.0.0.0/0".parse().unwrap()), - RouterTarget::InternetGateway(Some(default_igw)), - RouterClass::System, + Route { + dest: IpCidr::Ip4("0.0.0.0/0".parse().unwrap()), + target: RouterTarget::InternetGateway(Some(default_igw)), + class: RouterClass::System, + stat_id: None, + }, ) .unwrap(); incr!(g1, ["epoch", "router.rules.out"]); @@ -4461,33 +4540,45 @@ fn select_eip_conditioned_on_igw() { // Add custom inetgw routes. router::add_entry( &g1.port, - IpCidr::Ip4("1.1.1.0/24".parse().unwrap()), - RouterTarget::InternetGateway(Some(custom_igw0)), - RouterClass::Custom, + Route { + dest: IpCidr::Ip4("1.1.1.0/24".parse().unwrap()), + target: RouterTarget::InternetGateway(Some(custom_igw0)), + class: RouterClass::Custom, + stat_id: None, + }, ) .unwrap(); incr!(g1, ["epoch", "router.rules.out"]); router::add_entry( &g1.port, - IpCidr::Ip4("2.2.2.0/24".parse().unwrap()), - RouterTarget::InternetGateway(Some(custom_igw1)), - RouterClass::Custom, + Route { + dest: IpCidr::Ip4("2.2.2.0/24".parse().unwrap()), + target: RouterTarget::InternetGateway(Some(custom_igw1)), + class: RouterClass::Custom, + stat_id: None, + }, ) .unwrap(); incr!(g1, ["epoch", "router.rules.out"]); router::add_entry( &g1.port, - IpCidr::Ip4("3.3.3.0/24".parse().unwrap()), - RouterTarget::InternetGateway(Some(ipless_igw)), - RouterClass::Custom, + Route { + dest: IpCidr::Ip4("3.3.3.0/24".parse().unwrap()), + target: RouterTarget::InternetGateway(Some(ipless_igw)), + class: RouterClass::Custom, + stat_id: None, + }, ) .unwrap(); incr!(g1, ["epoch", "router.rules.out"]); router::add_entry( &g1.port, - IpCidr::Ip4("4.4.4.0/24".parse().unwrap()), - RouterTarget::InternetGateway(Some(all_ips_igw)), - RouterClass::Custom, + Route { + dest: IpCidr::Ip4("4.4.4.0/24".parse().unwrap()), + target: RouterTarget::InternetGateway(Some(all_ips_igw)), + class: RouterClass::Custom, + stat_id: None, + }, ) .unwrap(); incr!(g1, ["epoch", "router.rules.out"]); @@ -4739,7 +4830,7 @@ fn icmpv6_inner_has_nat_applied() { let rule = "dir=in action=allow priority=9 protocol=ICMP6"; firewall::add_fw_rule( &g1.port, - &AddFwRuleReq { + AddFwRuleReq { port_name: g1.port.name().to_string(), rule: rule.parse().unwrap(), }, diff --git a/xde-tests/src/lib.rs b/xde-tests/src/lib.rs index 7d39f80a..1ab57126 100644 --- a/xde-tests/src/lib.rs +++ b/xde-tests/src/lib.rs @@ -24,6 +24,7 @@ use oxide_vpc::api::Ipv6Addr; use oxide_vpc::api::MacAddr; use oxide_vpc::api::PhysNet; use oxide_vpc::api::Ports; +use oxide_vpc::api::Route; use oxide_vpc::api::RouterClass; use oxide_vpc::api::RouterTarget; use oxide_vpc::api::SNat4Cfg; @@ -119,9 +120,12 @@ impl OptePort { let adm = OpteHdl::open()?; adm.add_router_entry(&AddRouterEntryReq { port_name: self.name.clone(), - dest: IpCidr::Ip4(format!("{}/32", dest).parse().unwrap()), - target: RouterTarget::Ip(dest.parse().unwrap()), - class: RouterClass::System, + route: Route { + dest: IpCidr::Ip4(format!("{}/32", dest).parse().unwrap()), + target: RouterTarget::Ip(dest.parse().unwrap()), + class: RouterClass::System, + stat_id: None, + }, })?; Ok(()) } @@ -139,6 +143,7 @@ impl OptePort { action: FirewallAction::Allow, priority: 0, filters, + stat_id: None, }, })?; diff --git a/xde/src/xde.rs b/xde/src/xde.rs index 15782122..8cae6be0 100644 --- a/xde/src/xde.rs +++ b/xde/src/xde.rs @@ -2287,7 +2287,7 @@ fn add_router_entry_hdlr(env: &mut IoctlEnvelope) -> Result { .get_by_name(&req.port_name) .ok_or_else(|| OpteError::PortNotFound(req.port_name.clone()))?; - router::add_entry(&dev.port, req.dest, req.target, req.class) + router::add_entry(&dev.port, req.route) } #[unsafe(no_mangle)] @@ -2301,7 +2301,7 @@ fn del_router_entry_hdlr( .get_by_name(&req.port_name) .ok_or_else(|| OpteError::PortNotFound(req.port_name.clone()))?; - router::del_entry(&dev.port, req.dest, req.target, req.class) + router::del_entry(&dev.port, req.route) } #[unsafe(no_mangle)] @@ -2313,7 +2313,7 @@ fn add_fw_rule_hdlr(env: &mut IoctlEnvelope) -> Result { .get_by_name(&req.port_name) .ok_or_else(|| OpteError::PortNotFound(req.port_name.clone()))?; - firewall::add_fw_rule(&dev.port, &req)?; + firewall::add_fw_rule(&dev.port, req)?; Ok(NoResp::default()) } @@ -2326,7 +2326,7 @@ fn rem_fw_rule_hdlr(env: &mut IoctlEnvelope) -> Result { .get_by_name(&req.port_name) .ok_or_else(|| OpteError::PortNotFound(req.port_name.clone()))?; - firewall::rem_fw_rule(&dev.port, &req)?; + firewall::rem_fw_rule(&dev.port, req)?; Ok(NoResp::default()) } @@ -2339,7 +2339,7 @@ fn set_fw_rules_hdlr(env: &mut IoctlEnvelope) -> Result { .get_by_name(&req.port_name) .ok_or_else(|| OpteError::PortNotFound(req.port_name.clone()))?; - firewall::set_fw_rules(&dev.port, &req)?; + firewall::set_fw_rules(&dev.port, req)?; Ok(NoResp::default()) } From 9bd2b3639568bab22041065e6421b91d0d883846 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Fri, 27 Jun 2025 20:28:09 +0100 Subject: [PATCH 20/37] XXX thinking through actions being able to push RootStats --- lib/opte/src/engine/layer.rs | 1 + lib/opte/src/engine/packet.rs | 13 +++++++++++++ lib/opte/src/engine/stat.rs | 35 ++++++++++++++++++++++++++++++++--- 3 files changed, 46 insertions(+), 3 deletions(-) diff --git a/lib/opte/src/engine/layer.rs b/lib/opte/src/engine/layer.rs index f449cfdb..c2d6f289 100644 --- a/lib/opte/src/engine/layer.rs +++ b/lib/opte/src/engine/layer.rs @@ -1281,6 +1281,7 @@ impl Layer { if !matches!(action, Action::StatefulAllow | Action::Stateful(_)) { pkt.meta_mut().stats.push(stat.take().unwrap()); } + pkt.meta_mut().stats match action { Action::Allow => Ok(LayerResult::Allow), diff --git a/lib/opte/src/engine/packet.rs b/lib/opte/src/engine/packet.rs index ef75a9e9..695c7323 100644 --- a/lib/opte/src/engine/packet.rs +++ b/lib/opte/src/engine/packet.rs @@ -6,6 +6,8 @@ //! Types for creating, reading, and writing network packets. +use super::stat::RootStat; +use super::stat::StatParent; use super::Direction; use super::LightweightMeta; use super::NetworkParser; @@ -636,6 +638,17 @@ impl PacketData { csum != 0 } + + /// + /// TODO::::::::: + /// + /// Need to rethink this. This *should* be &mut, but we don't + /// want anything else in here to be mut to protect OPTE's design + /// (i.e., actions don't *actually* modify packets). So we maybe + /// need a view type preventing mut use of the other fields? + pub fn push_stat(&mut self, stat: RootStat) { + self.stats.push(stat.into()); + } } impl From<&PacketData> for InnerFlowId { diff --git a/lib/opte/src/engine/stat.rs b/lib/opte/src/engine/stat.rs index 305f75e2..a9e0989c 100644 --- a/lib/opte/src/engine/stat.rs +++ b/lib/opte/src/engine/stat.rs @@ -804,8 +804,28 @@ fn get_base_ids(parents: &[StatParent]) -> BTreeSet { /// Collects stats as a packet is processed, keeping track of the boundary /// of the most recent layer. /// -/// TODO: there are soundness rules to prevent double-counting if different -/// expiries occur at different times. Codify these. +/// ## Ensuring exact counting +/// For stats to be measured exactly (i.e., without any nondeterministic +/// double/triple-counting) you must ensure that your [`NetworkImpl`] is designed +/// so that each [`RootStat`] you define is only reachable by at most one path +/// from any flow. Duplicate root stats (in a flow or internal node) are +/// trivially filtered out, but reusing a [`RootStat`] in, e.g., a layer which +/// generates an LFT entry and then as the rule-stat in a stateless layer poses +/// problems. +/// +/// I.e., consider the below case: +/// ```text +/// flow(abcd)[ RootStat(0), RootStat(1), InternalNode(2), RootStat(3) ] +/// ^ +/// | +/// [ RootStat(1), RootStat(4), ... ] +/// ``` +/// `InternalNode(2)` could expire at a *later time* than `flow(abcd)`, +/// which means that it and `RootStat(1)` will inherit the flow stats on +/// its closure, and then RootStat(1) will inherit these *again* once +/// `InternalNode(2)` expires. +/// +/// [`NetworkImpl`]: super::NetworkImpl pub struct FlowStatBuilder { parents: Vec, layer_end: usize, @@ -821,7 +841,7 @@ impl FlowStatBuilder { } /// Push a parent onto this flow. - pub fn push(&mut self, parent: impl Into) { + pub fn push(&mut self, parent: StatParent) { self.parents.push(parent.into()); } @@ -830,6 +850,15 @@ impl FlowStatBuilder { self.layer_end = self.parents.len(); } + /// Mark all current parents as [`Action::Allow`], moving them all into + /// a new [`InternalStat`]. + pub fn new_layer_lft(&mut self, tree: &mut StatTree) -> Arc { + let out = tree.new_intermediate(self.parents.split_off(self.layer_end)); + self.parents.push(out.clone().into()); + self.new_layer(); + out + } + /// Return a list of stat parents if this packet is bound for flow creation. pub fn terminate( &mut self, From 42c7f28c2e37f3833a66c00c3becdb934dbcca61 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Mon, 30 Jun 2025 11:45:34 +0100 Subject: [PATCH 21/37] `InternalStat`s inclusive of action state. But no action state to exercise it, yet. --- lib/opte/src/engine/layer.rs | 55 +++++++++------------------------ lib/opte/src/engine/packet.rs | 3 +- lib/opte/src/engine/stat.rs | 57 +++++++++++++++++------------------ 3 files changed, 44 insertions(+), 71 deletions(-) diff --git a/lib/opte/src/engine/layer.rs b/lib/opte/src/engine/layer.rs index c2d6f289..2ead8a66 100644 --- a/lib/opte/src/engine/layer.rs +++ b/lib/opte/src/engine/layer.rs @@ -233,7 +233,7 @@ impl LayerFlowTable { // We add unchecked because the limit is now enforced by // LayerFlowTable, not the individual flow tables. let in_entry = - LftInEntry { action_desc: action_desc.clone(), stat: stat.clone() }; + LftInEntry { action_desc: action_desc.clone(), stat: Arc::clone(&stat) }; self.ft_in.add_unchecked(in_flow, in_entry); let out_entry = LftOutEntry { in_flow_pair: in_flow, action_desc, stat }; @@ -278,7 +278,7 @@ impl LayerFlowTable { Some(entry) => { entry.hit(); let action = entry.state().action_desc.clone(); - let stat = entry.state().stat.clone(); + let stat = Arc::clone(&entry.state().stat); if entry.is_dirty() { EntryState::Dirty(action, stat) } else { @@ -295,7 +295,7 @@ impl LayerFlowTable { Some(entry) => { entry.hit(); let action = entry.state().action_desc.clone(); - let stat = entry.state().stat.clone(); + let stat = Arc::clone(&entry.state().stat); if entry.is_dirty() { EntryState::Dirty(action, stat) } else { @@ -889,7 +889,7 @@ impl Layer { }; if let Some(stat) = stat { - pkt.meta_mut().stats.push(stat); + pkt.meta_mut().stats.push(stat.into()); } match action { @@ -945,20 +945,14 @@ impl Layer { let (action, stat) = if let Some(rule) = rule { self.stats.vals.in_rule_match += 1; - (rule.rule.action(), rule.stat.clone()) + (rule.rule.action(), Arc::clone(&rule.stat)) } else { self.stats.vals.in_rule_nomatch += 1; self.default_in_hits += 1; - (self.default_in.into(), self.default_in_stat.clone()) + (self.default_in.into(), Arc::clone(&self.default_in_stat)) }; - // No LFT to account for. - // TODO: figure out how to have actions push on some IDs - // that then belong to the LFT. - let mut stat = Some(stat); - if !matches!(action, Action::StatefulAllow | Action::Stateful(_)) { - pkt.meta_mut().stats.push(stat.take().unwrap()); - } + pkt.meta_mut().stats.push(stat.into()); match action { Action::Allow => Ok(LayerResult::Allow), @@ -972,10 +966,7 @@ impl Layer { }); } - let stat = ectx - .stats - .new_intermediate(vec![stat.take().unwrap().into()]); - pkt.meta_mut().stats.push(stat.clone()); + let stat = pkt.meta_mut().stats.new_layer_lft(&mut ectx.stats); // The outbound flow ID mirrors the inbound. Remember, // the "top" of layer represents how the client sees @@ -1136,10 +1127,7 @@ impl Layer { } } - let stat = ectx - .stats - .new_intermediate(vec![stat.take().unwrap().into()]); - pkt.meta_mut().stats.push(stat.clone()); + let stat = pkt.meta_mut().stats.new_layer_lft(&mut ectx.stats); // The outbound flow ID must be calculated _after_ the // header transformation. Remember, the "top" @@ -1211,7 +1199,7 @@ impl Layer { }; if let Some(stat) = stat { - pkt.meta_mut().stats.push(stat); + pkt.meta_mut().stats.push(stat.into()); } match action { @@ -1267,21 +1255,14 @@ impl Layer { let (action, stat) = if let Some(rule) = rule { self.stats.vals.out_rule_match += 1; - (rule.rule.action(), rule.stat.clone()) + (rule.rule.action(), Arc::clone(&rule.stat)) } else { self.stats.vals.out_rule_nomatch += 1; self.default_out_hits += 1; - (self.default_out.into(), self.default_out_stat.clone()) + (self.default_out.into(), Arc::clone(&self.default_out_stat)) }; - // No LFT to account for. - // TODO: figure out how to have actions push on some IDs - // that then belong to the LFT. - let mut stat = Some(stat); - if !matches!(action, Action::StatefulAllow | Action::Stateful(_)) { - pkt.meta_mut().stats.push(stat.take().unwrap()); - } - pkt.meta_mut().stats + pkt.meta_mut().stats.push(stat.into()); match action { Action::Allow => Ok(LayerResult::Allow), @@ -1295,10 +1276,7 @@ impl Layer { }); } - let stat = ectx - .stats - .new_intermediate(vec![stat.take().unwrap().into()]); - pkt.meta_mut().stats.push(stat.clone()); + let stat = pkt.meta_mut().stats.new_layer_lft(&mut ectx.stats); // The inbound flow ID must be calculated _after_ the // header transformation. Remember, the "top" @@ -1422,10 +1400,7 @@ impl Layer { }); } - let stat = ectx - .stats - .new_intermediate(vec![stat.take().unwrap().into()]); - pkt.meta_mut().stats.push(stat.clone()); + let stat = pkt.meta_mut().stats.new_layer_lft(&mut ectx.stats); let desc = match action.gen_desc(pkt.flow(), pkt, ameta) { Ok(aord) => match aord { diff --git a/lib/opte/src/engine/packet.rs b/lib/opte/src/engine/packet.rs index 695c7323..7dfcc8e7 100644 --- a/lib/opte/src/engine/packet.rs +++ b/lib/opte/src/engine/packet.rs @@ -7,7 +7,6 @@ //! Types for creating, reading, and writing network packets. use super::stat::RootStat; -use super::stat::StatParent; use super::Direction; use super::LightweightMeta; use super::NetworkParser; @@ -646,7 +645,7 @@ impl PacketData { /// want anything else in here to be mut to protect OPTE's design /// (i.e., actions don't *actually* modify packets). So we maybe /// need a view type preventing mut use of the other fields? - pub fn push_stat(&mut self, stat: RootStat) { + pub fn push_stat(&mut self, stat: Arc) { self.stats.push(stat.into()); } } diff --git a/lib/opte/src/engine/stat.rs b/lib/opte/src/engine/stat.rs index a9e0989c..7c0f83ab 100644 --- a/lib/opte/src/engine/stat.rs +++ b/lib/opte/src/engine/stat.rs @@ -496,7 +496,7 @@ impl StatTree { let uuid = uuid.unwrap_or_else(|| Uuid::from_u64_pair(0, self.next_id)); let ids = &mut self.next_id; - self.roots + Arc::clone(self.roots .entry(uuid) .or_insert_with(|| { Arc::new(RootStat { @@ -507,8 +507,7 @@ impl StatTree { stats: FullCounter::from_next_id(ids), }, }) - }) - .clone() + })) } /// Creates a new internal node from a given set of parents. @@ -528,7 +527,7 @@ impl StatTree { parent.append_child(&out); } - self.internal.push(out.clone()); + self.internal.push(Arc::clone(&out)); out } @@ -556,7 +555,7 @@ impl StatTree { // -- flow closes // In the above example, b' and c should receive the packet // byte/counts split at the epoch 0->1 transition. - return e.get().clone(); + return Arc::clone(e.get()); } let parents = parents.into_boxed_slice(); @@ -569,7 +568,7 @@ impl StatTree { partner: *partner_flow, parents, bases, - shared: partner.get().shared.clone(), + shared: Arc::clone(&partner.get().shared), last_hit: Moment::now().raw().into(), }), // Miss, no partner. @@ -595,7 +594,7 @@ impl StatTree { } // We have proven a miss on flow_id already - let _ = self.flows.insert(*flow_id, out.clone()); + let _ = self.flows.insert(*flow_id, Arc::clone(&out)); out } @@ -854,7 +853,7 @@ impl FlowStatBuilder { /// a new [`InternalStat`]. pub fn new_layer_lft(&mut self, tree: &mut StatTree) -> Arc { let out = tree.new_intermediate(self.parents.split_off(self.layer_end)); - self.parents.push(out.clone().into()); + self.parents.push(Arc::clone(&out).into()); self.new_layer(); out } @@ -989,12 +988,12 @@ mod tests { let i1 = tree.new_intermediate(vec![r2.into()]); let mut fb = FlowStatBuilder::new(); - fb.push(i0.clone()); + fb.push(Arc::clone(&i0).into()); fb.new_layer(); - fb.push(r1.clone()); + fb.push(Arc::clone(&r1).into()); fb.new_layer(); - fb.push(i1.clone()); - fb.push(r3.clone()); + fb.push(Arc::clone(&i1).into()); + fb.push(Arc::clone(&r3).into()); assert!( fb.terminate(Action::Deny, 128, Direction::Out, false).is_none() @@ -1025,7 +1024,7 @@ mod tests { // Does this work with only one layer? let mut fb = FlowStatBuilder::new(); - fb.push(i0.clone()); + fb.push(Arc::clone(&i0).into()); assert!( fb.terminate(Action::Deny, 64, Direction::Out, false).is_none() ); @@ -1046,14 +1045,14 @@ mod tests { let r2 = tree.root(Some(ROOT_2)); let r3 = tree.root(Some(ROOT_3)); - let i0 = tree.new_intermediate(vec![r0.clone().into()]); - let i1 = tree.new_intermediate(vec![r1.clone().into()]); + let i0 = tree.new_intermediate(vec![Arc::clone(&r0).into()]); + let i1 = tree.new_intermediate(vec![Arc::clone(&r1).into()]); let p_sz = 64; let f_out = { let mut fb = FlowStatBuilder::new(); - fb.push(i0.clone()); - fb.push(r3.clone()); + fb.push(Arc::clone(&i0).into()); + fb.push(Arc::clone(&r3).into()); tree.new_flow( &FLOW_OUT, &FLOW_IN, @@ -1067,9 +1066,9 @@ mod tests { let f_in = { let mut fb = FlowStatBuilder::new(); - fb.push(i0.clone()); - fb.push(i1.clone()); - fb.push(r2.clone()); + fb.push(Arc::clone(&i0).into()); + fb.push(Arc::clone(&i1).into()); + fb.push(Arc::clone(&r2).into()); tree.new_flow( &FLOW_IN, &FLOW_OUT, @@ -1179,12 +1178,12 @@ mod tests { let r2 = tree.root(Some(ROOT_2)); let r3 = tree.root(Some(ROOT_3)); - let i0 = tree.new_intermediate(vec![r0.clone().into()]); - let i1 = tree.new_intermediate(vec![r1.clone().into()]); + let i0 = tree.new_intermediate(vec![Arc::clone(&r0).into()]); + let i1 = tree.new_intermediate(vec![Arc::clone(&r1).into()]); let f0_out = { let mut fb = FlowStatBuilder::new(); - fb.push(i0.clone()); + fb.push(Arc::clone(&i0).into()); tree.new_flow( &FLOW_OUT, &FLOW_IN, @@ -1196,9 +1195,9 @@ mod tests { let f0_in = { let mut fb = FlowStatBuilder::new(); - fb.push(i0.clone()); - fb.push(i1.clone()); - fb.push(r2.clone()); + fb.push(Arc::clone(&i0).into()); + fb.push(Arc::clone(&i1).into()); + fb.push(Arc::clone(&r2).into()); tree.new_flow( &FLOW_IN, &FLOW_OUT, @@ -1210,9 +1209,9 @@ mod tests { let f1_out = { let mut fb = FlowStatBuilder::new(); - fb.push(i0.clone()); - fb.push(r2.clone()); - fb.push(r3.clone()); + fb.push(Arc::clone(&i0).into()); + fb.push(Arc::clone(&r2).into()); + fb.push(Arc::clone(&r3).into()); tree.new_flow( &FLOW_OUT_2, &FLOW_IN_2, From 034e268452f6de4eeba5cc1099f46cfce48fcae6 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Mon, 30 Jun 2025 11:51:27 +0100 Subject: [PATCH 22/37] ...and `fmt` --- lib/opte/src/engine/layer.rs | 6 ++++-- lib/opte/src/engine/packet.rs | 6 +++--- lib/opte/src/engine/stat.rs | 24 +++++++++++------------- 3 files changed, 18 insertions(+), 18 deletions(-) diff --git a/lib/opte/src/engine/layer.rs b/lib/opte/src/engine/layer.rs index 2ead8a66..51f11ab4 100644 --- a/lib/opte/src/engine/layer.rs +++ b/lib/opte/src/engine/layer.rs @@ -232,8 +232,10 @@ impl LayerFlowTable { ) { // We add unchecked because the limit is now enforced by // LayerFlowTable, not the individual flow tables. - let in_entry = - LftInEntry { action_desc: action_desc.clone(), stat: Arc::clone(&stat) }; + let in_entry = LftInEntry { + action_desc: action_desc.clone(), + stat: Arc::clone(&stat), + }; self.ft_in.add_unchecked(in_flow, in_entry); let out_entry = LftOutEntry { in_flow_pair: in_flow, action_desc, stat }; diff --git a/lib/opte/src/engine/packet.rs b/lib/opte/src/engine/packet.rs index 7dfcc8e7..d93d598a 100644 --- a/lib/opte/src/engine/packet.rs +++ b/lib/opte/src/engine/packet.rs @@ -6,7 +6,6 @@ //! Types for creating, reading, and writing network packets. -use super::stat::RootStat; use super::Direction; use super::LightweightMeta; use super::NetworkParser; @@ -36,6 +35,7 @@ use super::rule::CompiledTransform; use super::rule::HdrTransform; use super::rule::HdrTransformError; use super::stat::FlowStatBuilder; +use super::stat::RootStat; pub use crate::api::AddrPair; pub use crate::api::FLOW_ID_DEFAULT; use crate::api::IcmpInfo; @@ -638,9 +638,9 @@ impl PacketData { csum != 0 } - /// + /// /// TODO::::::::: - /// + /// /// Need to rethink this. This *should* be &mut, but we don't /// want anything else in here to be mut to protect OPTE's design /// (i.e., actions don't *actually* modify packets). So we maybe diff --git a/lib/opte/src/engine/stat.rs b/lib/opte/src/engine/stat.rs index 7c0f83ab..6b78cc81 100644 --- a/lib/opte/src/engine/stat.rs +++ b/lib/opte/src/engine/stat.rs @@ -496,18 +496,16 @@ impl StatTree { let uuid = uuid.unwrap_or_else(|| Uuid::from_u64_pair(0, self.next_id)); let ids = &mut self.next_id; - Arc::clone(self.roots - .entry(uuid) - .or_insert_with(|| { - Arc::new(RootStat { - id: uuid, - last_hit: Moment::now().raw().into(), - body: TableStat { - children: KRwLock::new(vec![]), - stats: FullCounter::from_next_id(ids), - }, - }) - })) + Arc::clone(self.roots.entry(uuid).or_insert_with(|| { + Arc::new(RootStat { + id: uuid, + last_hit: Moment::now().raw().into(), + body: TableStat { + children: KRwLock::new(vec![]), + stats: FullCounter::from_next_id(ids), + }, + }) + })) } /// Creates a new internal node from a given set of parents. @@ -811,7 +809,7 @@ fn get_base_ids(parents: &[StatParent]) -> BTreeSet { /// trivially filtered out, but reusing a [`RootStat`] in, e.g., a layer which /// generates an LFT entry and then as the rule-stat in a stateless layer poses /// problems. -/// +/// /// I.e., consider the below case: /// ```text /// flow(abcd)[ RootStat(0), RootStat(1), InternalNode(2), RootStat(3) ] From 48d9d64bd9f8b0b8bedb2f5ce4e30e3fa1feebd0 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Mon, 30 Jun 2025 12:16:02 +0100 Subject: [PATCH 23/37] Some clippy before I reshape the datapath, again. --- lib/opte/src/engine/layer.rs | 8 ++++---- lib/opte/src/engine/stat.rs | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/lib/opte/src/engine/layer.rs b/lib/opte/src/engine/layer.rs index 51f11ab4..81be926b 100644 --- a/lib/opte/src/engine/layer.rs +++ b/lib/opte/src/engine/layer.rs @@ -968,7 +968,7 @@ impl Layer { }); } - let stat = pkt.meta_mut().stats.new_layer_lft(&mut ectx.stats); + let stat = pkt.meta_mut().stats.new_layer_lft(ectx.stats); // The outbound flow ID mirrors the inbound. Remember, // the "top" of layer represents how the client sees @@ -1129,7 +1129,7 @@ impl Layer { } } - let stat = pkt.meta_mut().stats.new_layer_lft(&mut ectx.stats); + let stat = pkt.meta_mut().stats.new_layer_lft(ectx.stats); // The outbound flow ID must be calculated _after_ the // header transformation. Remember, the "top" @@ -1278,7 +1278,7 @@ impl Layer { }); } - let stat = pkt.meta_mut().stats.new_layer_lft(&mut ectx.stats); + let stat = pkt.meta_mut().stats.new_layer_lft(ectx.stats); // The inbound flow ID must be calculated _after_ the // header transformation. Remember, the "top" @@ -1402,7 +1402,7 @@ impl Layer { }); } - let stat = pkt.meta_mut().stats.new_layer_lft(&mut ectx.stats); + let stat = pkt.meta_mut().stats.new_layer_lft(ectx.stats); let desc = match action.gen_desc(pkt.flow(), pkt, ameta) { Ok(aord) => match aord { diff --git a/lib/opte/src/engine/stat.rs b/lib/opte/src/engine/stat.rs index 6b78cc81..20f0217a 100644 --- a/lib/opte/src/engine/stat.rs +++ b/lib/opte/src/engine/stat.rs @@ -839,7 +839,7 @@ impl FlowStatBuilder { /// Push a parent onto this flow. pub fn push(&mut self, parent: StatParent) { - self.parents.push(parent.into()); + self.parents.push(parent); } /// Mark all current parents as [`Action::Allow`]. From 7d62658dfa4f79299abe9e4efdb099461c17724f Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Mon, 30 Jun 2025 15:28:37 +0100 Subject: [PATCH 24/37] Pulling through some view stuff. Messy! --- lib/opte/src/engine/dhcp.rs | 3 +- lib/opte/src/engine/dhcpv6/protocol.rs | 11 +- lib/opte/src/engine/icmp/v4.rs | 3 +- lib/opte/src/engine/icmp/v6.rs | 7 +- lib/opte/src/engine/layer.rs | 153 +++++++------- lib/opte/src/engine/nat.rs | 10 +- lib/opte/src/engine/packet.rs | 255 +++++++++++++++++++----- lib/opte/src/engine/rule.rs | 12 +- lib/opte/src/engine/snat.rs | 20 +- lib/oxide-vpc/src/engine/gateway/mod.rs | 3 +- lib/oxide-vpc/src/engine/overlay.rs | 5 +- 11 files changed, 320 insertions(+), 162 deletions(-) diff --git a/lib/opte/src/engine/dhcp.rs b/lib/opte/src/engine/dhcp.rs index 3a80ad0c..1ddf1d41 100644 --- a/lib/opte/src/engine/dhcp.rs +++ b/lib/opte/src/engine/dhcp.rs @@ -9,6 +9,7 @@ use super::ether::Ethernet; use super::ip::v4::*; use super::packet::MblkPacketData; +use super::packet::MblkPacketDataView; use super::predicate::DataPredicate; use super::predicate::EtherAddrMatch; use super::predicate::IpProtoMatch; @@ -478,7 +479,7 @@ impl HairpinAction for DhcpAction { (hdr_preds, data_preds) } - fn gen_packet(&self, meta: &MblkPacketData) -> GenPacketResult { + fn gen_packet(&self, meta: MblkPacketDataView) -> GenPacketResult { let body = meta.copy_remaining(); let client_pkt = DhcpPacket::new_checked(&body)?; let client_dhcp = DhcpRepr::parse(&client_pkt)?; diff --git a/lib/opte/src/engine/dhcpv6/protocol.rs b/lib/opte/src/engine/dhcpv6/protocol.rs index 3d0caa1f..27344dd0 100644 --- a/lib/opte/src/engine/dhcpv6/protocol.rs +++ b/lib/opte/src/engine/dhcpv6/protocol.rs @@ -26,6 +26,7 @@ use crate::engine::ether::Ethernet; use crate::engine::ip::v6::Ipv6; use crate::engine::ip::v6::Ipv6Ref; use crate::engine::packet::MblkPacketData; +use crate::engine::packet::MblkPacketDataView; use crate::engine::predicate::DataPredicate; use crate::engine::predicate::EtherAddrMatch; use crate::engine::predicate::IpProtoMatch; @@ -582,10 +583,9 @@ fn process_confirm_message<'a>( } } -// Process a DHCPv6 message from the a client. +// Process a DHCPv6 message from a client. fn process_client_message<'a>( action: &'a Dhcpv6Action, - _meta: &'a MblkPacketData, client_msg: &'a Message<'a>, ) -> Option> { match client_msg.typ { @@ -607,7 +607,7 @@ fn process_client_message<'a>( // the request and the actual DHCPv6 message to send out. fn generate_packet<'a>( action: &Dhcpv6Action, - meta: &MblkPacketData, + meta: MblkPacketDataView, msg: &'a Message<'a>, ) -> GenPacketResult { let udp = Udp { @@ -667,11 +667,10 @@ impl HairpinAction for Dhcpv6Action { // Rather than put this logic into DataPredicates, we just parse the packet // here and reply accordingly. So the `Dhcpv6Action` is really a full // server, to the extent we emulate one. - fn gen_packet(&self, meta: &MblkPacketData) -> GenPacketResult { + fn gen_packet(&self, meta: MblkPacketDataView) -> GenPacketResult { let body = meta.copy_remaining(); if let Some(client_msg) = Message::from_bytes(&body) { - if let Some(reply) = process_client_message(self, meta, &client_msg) - { + if let Some(reply) = process_client_message(self, &client_msg) { generate_packet(self, meta, &reply) } else { Ok(AllowOrDeny::Deny) diff --git a/lib/opte/src/engine/icmp/v4.rs b/lib/opte/src/engine/icmp/v4.rs index 93aa1e52..98601757 100644 --- a/lib/opte/src/engine/icmp/v4.rs +++ b/lib/opte/src/engine/icmp/v4.rs @@ -12,6 +12,7 @@ use crate::engine::checksum::HeaderChecksum; use crate::engine::ether::Ethernet; use crate::engine::ip::v4::Ipv4; use crate::engine::packet::MblkPacketData; +use crate::engine::packet::MblkPacketDataView; use crate::engine::predicate::Ipv4AddrMatch; use ingot::ethernet::Ethertype; use ingot::icmp::IcmpV4; @@ -50,7 +51,7 @@ impl HairpinAction for IcmpEchoReply { (hdr_preds, vec![]) } - fn gen_packet(&self, meta: &MblkPacketData) -> GenPacketResult { + fn gen_packet(&self, meta: MblkPacketDataView) -> GenPacketResult { let Some(icmp) = meta.inner_icmp() else { // Getting here implies the predicate matched, but that the // extracted metadata indicates this isn't an ICMP packet. That diff --git a/lib/opte/src/engine/icmp/v6.rs b/lib/opte/src/engine/icmp/v6.rs index 5562c19c..aee80fc9 100644 --- a/lib/opte/src/engine/icmp/v6.rs +++ b/lib/opte/src/engine/icmp/v6.rs @@ -13,6 +13,7 @@ use crate::engine::ether::Ethernet; use crate::engine::ip::v6::Ipv6; use crate::engine::ip::v6::Ipv6Ref; use crate::engine::packet::MblkPacketData; +use crate::engine::packet::MblkPacketDataView; use crate::engine::predicate::Ipv6AddrMatch; use alloc::string::String; use ingot::ethernet::Ethertype; @@ -109,7 +110,7 @@ impl HairpinAction for Icmpv6EchoReply { (hdr_preds, vec![]) } - fn gen_packet(&self, meta: &MblkPacketData) -> GenPacketResult { + fn gen_packet(&self, meta: MblkPacketDataView) -> GenPacketResult { let Some(icmp6) = meta.inner_icmp6() else { // Getting here implies the predicate matched, but that the // extracted metadata indicates this isn't an ICMPv6 packet. That @@ -234,7 +235,7 @@ impl HairpinAction for RouterAdvertisement { (hdr_preds, vec![]) } - fn gen_packet(&self, meta: &MblkPacketData) -> GenPacketResult { + fn gen_packet(&self, meta: MblkPacketDataView) -> GenPacketResult { use smoltcp::time::Duration; use smoltcp::wire::NdiscRouterFlags; @@ -552,7 +553,7 @@ impl HairpinAction for NeighborAdvertisement { (hdr_preds, vec![]) } - fn gen_packet(&self, meta: &MblkPacketData) -> GenPacketResult { + fn gen_packet(&self, meta: MblkPacketDataView) -> GenPacketResult { let Some(icmp6) = meta.inner_icmp6() else { // Getting here implies the predicate matched, but that the // extracted metadata indicates this isn't an ICMPv6 packet. That diff --git a/lib/opte/src/engine/layer.rs b/lib/opte/src/engine/layer.rs index 81be926b..b9cbf26d 100644 --- a/lib/opte/src/engine/layer.rs +++ b/lib/opte/src/engine/layer.rs @@ -914,13 +914,9 @@ impl Layer { pkt.flow(), ); - if let Some(body_segs) = pkt.body() { - if let Some(bt) = - desc.gen_bt(Direction::In, pkt.meta(), body_segs)? - { - pkt.body_transform(Direction::In, &*bt)?; - xforms.body.push(bt); - } + if let Some(bt) = desc.gen_bt(Direction::In, pkt.meta_view())? { + pkt.body_transform(Direction::In, &*bt)?; + xforms.body.push(bt); } Ok(LayerResult::Allow) @@ -955,6 +951,7 @@ impl Layer { }; pkt.meta_mut().stats.push(stat.into()); + let flow_before = *pkt.flow(); match action { Action::Allow => Ok(LayerResult::Allow), @@ -1007,8 +1004,12 @@ impl Layer { }, Action::Static(action) => { - let ht = match action.gen_ht(In, pkt.flow(), pkt.meta(), ameta) - { + let ht = match action.gen_ht( + In, + &flow_before, + pkt.meta_view(), + ameta, + ) { Ok(aord) => match aord { AllowOrDeny::Allow(ht) => ht, AllowOrDeny::Deny => { @@ -1033,7 +1034,6 @@ impl Layer { } }; - let flow_before = *pkt.flow(); pkt.hdr_transform(&ht)?; xforms.hdr.push(ht); ht_probe( @@ -1085,30 +1085,30 @@ impl Layer { }); } - // TODO: how on earth are we plumbing StatTree into here?? - - let desc = match action.gen_desc(pkt.flow(), pkt, ameta) { - Ok(aord) => match aord { - AllowOrDeny::Allow(desc) => desc, - - AllowOrDeny::Deny => { - return Ok(LayerResult::Deny { - name: self.name, - reason: DenyReason::Action, - }); + let desc = + match action.gen_desc(&flow_before, pkt.meta_view(), ameta) + { + Ok(aord) => match aord { + AllowOrDeny::Allow(desc) => desc, + + AllowOrDeny::Deny => { + return Ok(LayerResult::Deny { + name: self.name, + reason: DenyReason::Action, + }); + } + }, + + Err(e) => { + self.record_gen_desc_failure( + ectx.user_ctx, + In, + pkt.flow(), + &e, + ); + return Err(LayerError::GenDesc(e)); } - }, - - Err(e) => { - self.record_gen_desc_failure( - ectx.user_ctx, - In, - pkt.flow(), - &e, - ); - return Err(LayerError::GenDesc(e)); - } - }; + }; let flow_before = *pkt.flow(); let ht_in = desc.gen_ht(In); @@ -1122,11 +1122,9 @@ impl Layer { pkt.flow(), ); - if let Some(body_segs) = pkt.body() { - if let Some(bt) = desc.gen_bt(In, pkt.meta(), body_segs)? { - pkt.body_transform(In, &*bt)?; - xforms.body.push(bt); - } + if let Some(bt) = desc.gen_bt(In, pkt.meta_view())? { + pkt.body_transform(In, &*bt)?; + xforms.body.push(bt); } let stat = pkt.meta_mut().stats.new_layer_lft(ectx.stats); @@ -1150,7 +1148,7 @@ impl Layer { } Action::Hairpin(action) => { - match action.gen_packet(pkt.meta()) { + match action.gen_packet(pkt.meta_view()) { Ok(AllowOrDeny::Allow(pkt)) => { Ok(LayerResult::Hairpin(pkt)) } @@ -1224,13 +1222,11 @@ impl Layer { pkt.flow(), ); - if let Some(body_segs) = pkt.body() { - if let Some(bt) = - desc.gen_bt(Direction::Out, pkt.meta(), body_segs)? - { - pkt.body_transform(Direction::Out, &*bt)?; - xforms.body.push(bt); - } + if let Some(bt) = + desc.gen_bt(Direction::Out, pkt.meta_view())? + { + pkt.body_transform(Direction::Out, &*bt)?; + xforms.body.push(bt); } Ok(LayerResult::Allow) @@ -1265,6 +1261,7 @@ impl Layer { }; pkt.meta_mut().stats.push(stat.into()); + let flow_before = *pkt.flow(); match action { Action::Allow => Ok(LayerResult::Allow), @@ -1324,8 +1321,12 @@ impl Layer { }, Action::Static(action) => { - let ht = match action.gen_ht(Out, pkt.flow(), pkt.meta(), ameta) - { + let ht = match action.gen_ht( + Out, + &flow_before, + pkt.meta_view(), + ameta, + ) { Ok(aord) => match aord { AllowOrDeny::Allow(ht) => ht, AllowOrDeny::Deny => { @@ -1350,7 +1351,6 @@ impl Layer { } }; - let flow_before = *pkt.flow(); pkt.hdr_transform(&ht)?; xforms.hdr.push(ht); ht_probe( @@ -1404,30 +1404,31 @@ impl Layer { let stat = pkt.meta_mut().stats.new_layer_lft(ectx.stats); - let desc = match action.gen_desc(pkt.flow(), pkt, ameta) { - Ok(aord) => match aord { - AllowOrDeny::Allow(desc) => desc, - - AllowOrDeny::Deny => { - return Ok(LayerResult::Deny { - name: self.name, - reason: DenyReason::Action, - }); + let desc = + match action.gen_desc(&flow_before, pkt.meta_view(), ameta) + { + Ok(aord) => match aord { + AllowOrDeny::Allow(desc) => desc, + + AllowOrDeny::Deny => { + return Ok(LayerResult::Deny { + name: self.name, + reason: DenyReason::Action, + }); + } + }, + + Err(e) => { + self.record_gen_desc_failure( + ectx.user_ctx, + Out, + pkt.flow(), + &e, + ); + return Err(LayerError::GenDesc(e)); } - }, + }; - Err(e) => { - self.record_gen_desc_failure( - ectx.user_ctx, - Out, - pkt.flow(), - &e, - ); - return Err(LayerError::GenDesc(e)); - } - }; - - let flow_before = *pkt.flow(); let ht_out = desc.gen_ht(Out); pkt.hdr_transform(&ht_out)?; xforms.hdr.push(ht_out); @@ -1439,11 +1440,9 @@ impl Layer { pkt.flow(), ); - if let Some(body_segs) = pkt.body() { - if let Some(bt) = desc.gen_bt(Out, pkt.meta(), body_segs)? { - pkt.body_transform(Out, &*bt)?; - xforms.body.push(bt); - } + if let Some(bt) = desc.gen_bt(Out, pkt.meta_view())? { + pkt.body_transform(Out, &*bt)?; + xforms.body.push(bt); } // The inbound flow ID must be calculated _after_ the @@ -1466,7 +1465,7 @@ impl Layer { } Action::Hairpin(action) => { - match action.gen_packet(pkt.meta()) { + match action.gen_packet(pkt.meta_view()) { Ok(AllowOrDeny::Allow(pkt)) => { Ok(LayerResult::Hairpin(pkt)) } diff --git a/lib/opte/src/engine/nat.rs b/lib/opte/src/engine/nat.rs index e8f7e190..b7b1958a 100644 --- a/lib/opte/src/engine/nat.rs +++ b/lib/opte/src/engine/nat.rs @@ -18,6 +18,7 @@ use super::packet::BodyTransform; use super::packet::BodyTransformError; use super::packet::InnerFlowId; use super::packet::MblkFullParsed; +use super::packet::MblkPacketDataView; use super::packet::Packet; use super::parse::Ulp; use super::parse::UlpRepr; @@ -105,7 +106,7 @@ impl StatefulAction for OutboundNat { fn gen_desc( &self, flow_id: &InnerFlowId, - _pkt: &Packet, + _pkt: MblkPacketDataView, _meta: &mut ActionMeta, ) -> rule::GenDescResult { // When we have several external IPs at our disposal, we are @@ -168,7 +169,7 @@ impl StatefulAction for InboundNat { fn gen_desc( &self, flow_id: &InnerFlowId, - _pkt: &Packet, + _pkt: MblkPacketDataView, _meta: &mut ActionMeta, ) -> rule::GenDescResult { // We rely on the attached predicates to filter out IPs which are *not* @@ -234,8 +235,7 @@ impl ActionDesc for NatDesc { fn gen_bt( &self, _dir: Direction, - meta: &super::packet::MblkPacketData, - _payload_seg: &[u8], + meta: MblkPacketDataView, ) -> Result>, rule::GenBtError> { // ICMPv4/v6 traffic can carry frames which they were generated // in response to. We need to also apply our NAT transform to @@ -459,7 +459,7 @@ mod test { // Verify descriptor generation. // ================================================================ let flow_out = InnerFlowId::from(pkt.meta()); - let desc = match nat.gen_desc(&flow_out, &pkt, &mut ameta) { + let desc = match nat.gen_desc(&flow_out, pkt.meta_view(), &mut ameta) { Ok(AllowOrDeny::Allow(desc)) => desc, _ => panic!("expected AllowOrDeny::Allow(desc) result"), }; diff --git a/lib/opte/src/engine/packet.rs b/lib/opte/src/engine/packet.rs index d93d598a..af8e6ad3 100644 --- a/lib/opte/src/engine/packet.rs +++ b/lib/opte/src/engine/packet.rs @@ -457,14 +457,173 @@ impl Drop for PktBodyWalker { } } +/// View of [`PacketData`] for use in action bodies. +pub struct PacketDataView<'a, T: Read + Pullup> { + pub headers: &'a OpteMeta, + pub initial_lens: &'a InitialLayerLens, + body: &'a PktBodyWalker, + pub stats: &'a mut FlowStatBuilder, +} + +impl core::fmt::Debug for PacketDataView<'_, T> { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + f.write_str("PacketHeaders(..)") + } +} + /// Packet state for the standard ULP path, or a full table walk over the slowpath. pub struct PacketData { pub(crate) headers: OpteMeta, - initial_lens: Option>, + initial_lens: InitialLayerLens, body: PktBodyWalker, pub(crate) stats: FlowStatBuilder, } +impl PacketDataView<'_, T> { + pub fn outer_ether( + &self, + ) -> Option<&InlineHeader>> { + self.headers.outer_eth.as_ref() + } + + pub fn outer_ip(&self) -> Option<&L3> { + self.headers.outer_l3.as_ref() + } + + /// Returns whether this packet is sourced from outside the rack, + /// in addition to its VNI. + pub fn outer_encap_geneve_vni_and_origin(&self) -> Option<(Vni, bool)> { + match &self.headers.outer_encap { + Some(InlineHeader::Repr(EncapMeta::Geneve(g))) => { + Some((g.vni, g.oxide_external_pkt)) + } + Some(InlineHeader::Raw(ValidEncapMeta::Geneve(_, g))) => { + Some((g.vni(), valid_geneve_has_oxide_external(g))) + } + None => None, + } + } + + pub fn inner_ether(&self) -> &EthernetPacket { + &self.headers.inner_eth + } + + pub fn inner_l3(&self) -> Option<&L3> { + self.headers.inner_l3.as_ref() + } + + pub fn inner_ulp(&self) -> Option<&Ulp> { + self.headers.inner_ulp.as_ref() + } + + pub fn inner_ip4(&self) -> Option<&Ipv4Packet> { + self.inner_l3().and_then(|v| match v { + L3::Ipv4(v) => Some(v), + _ => None, + }) + } + + pub fn inner_ip6(&self) -> Option<&Ipv6Packet> { + self.inner_l3().and_then(|v| match v { + L3::Ipv6(v) => Some(v), + _ => None, + }) + } + + pub fn inner_icmp(&self) -> Option<&IcmpV4Packet> { + self.inner_ulp().and_then(|v| match v { + Ulp::IcmpV4(v) => Some(v), + _ => None, + }) + } + + pub fn inner_icmp6(&self) -> Option<&IcmpV6Packet> { + self.inner_ulp().and_then(|v| match v { + Ulp::IcmpV6(v) => Some(v), + _ => None, + }) + } + + pub fn inner_tcp(&self) -> Option<&TcpPacket> { + self.inner_ulp().and_then(|v| match v { + Ulp::Tcp(v) => Some(v), + _ => None, + }) + } + + pub fn inner_udp(&self) -> Option<&UdpPacket> { + self.inner_ulp().and_then(|v| match v { + Ulp::Udp(v) => Some(v), + _ => None, + }) + } + + pub fn is_inner_tcp(&self) -> bool { + matches!(self.inner_ulp(), Some(Ulp::Tcp(_))) + } + + pub fn body(&self) -> &[u8] + where + T::Chunk: ByteSliceMut, + T: Pullup, + { + self.body.body() + } + + pub fn copy_remaining(&self) -> Vec + where + T::Chunk: ByteSliceMut, + T: Pullup, + { + let base = self.body(); + base.to_vec() + } + + pub fn append_remaining(&self, buf: &mut Vec) + where + T::Chunk: ByteSliceMut, + T: Pullup, + { + let base = self.body(); + buf.extend_from_slice(base); + } + + /// Return whether the IP layer has a checksum both structurally + /// and that it is non-zero (i.e., not offloaded). + pub fn has_ip_csum(&self) -> bool { + match &self.headers.inner_l3 { + Some(L3::Ipv4(v4)) => v4.checksum() != 0, + Some(L3::Ipv6(_)) => false, + None => false, + } + } + + /// Return whether the ULP layer has a checksum both structurally + /// and that it is non-zero (i.e., not offloaded). + pub fn has_ulp_csum(&self) -> bool { + let csum = match &self.headers.inner_ulp { + Some(Ulp::Tcp(t)) => t.checksum(), + Some(Ulp::Udp(u)) => u.checksum(), + Some(Ulp::IcmpV4(i4)) => i4.checksum(), + Some(Ulp::IcmpV6(i6)) => i6.checksum(), + None => return false, + }; + + csum != 0 + } + + /// Push a rootstat + /// TODO::::::::: + /// + /// Need to rethink this. This *should* be &mut, but we don't + /// want anything else in here to be mut to protect OPTE's design + /// (i.e., actions don't *actually* modify packets). So we maybe + /// need a view type preventing mut use of the other fields? + pub fn push_stat(&mut self, stat: Arc) { + self.stats.push(stat.into()); + } +} + impl From> for OpteMeta { #[inline] fn from(value: NoEncap) -> Self { @@ -486,8 +645,37 @@ impl core::fmt::Debug for PacketData { } impl PacketData { - pub fn initial_lens(&self) -> Option<&InitialLayerLens> { - self.initial_lens.as_deref() + pub fn view(&mut self) -> PacketDataView { + PacketDataView { + headers: &self.headers, + initial_lens: &self.initial_lens, + body: &self.body, + stats: &mut self.stats, + } + } + + pub fn prep_body(&mut self) + where + T::Chunk: ByteSliceMut, + T: Pullup, + { + self.body.prepare() + } + + pub fn body(&self) -> &[u8] + where + T::Chunk: ByteSliceMut, + T: Pullup, + { + self.body.body() + } + + pub fn body_mut(&mut self) -> &mut [u8] + where + T::Chunk: ByteSliceMut, + T: Pullup, + { + self.body.body_mut() } pub fn outer_ether( @@ -572,22 +760,6 @@ impl PacketData { matches!(self.inner_ulp(), Some(Ulp::Tcp(_))) } - pub fn prep_body(&mut self) - where - T::Chunk: ByteSliceMut, - T: Pullup, - { - self.body.prepare() - } - - pub fn body(&self) -> &[u8] - where - T::Chunk: ByteSliceMut, - T: Pullup, - { - self.body.body() - } - pub fn copy_remaining(&self) -> Vec where T::Chunk: ByteSliceMut, @@ -606,14 +778,6 @@ impl PacketData { buf.extend_from_slice(base); } - pub fn body_mut(&mut self) -> &mut [u8] - where - T::Chunk: ByteSliceMut, - T: Pullup, - { - self.body.body_mut() - } - /// Return whether the IP layer has a checksum both structurally /// and that it is non-zero (i.e., not offloaded). pub fn has_ip_csum(&self) -> bool { @@ -637,17 +801,6 @@ impl PacketData { csum != 0 } - - /// - /// TODO::::::::: - /// - /// Need to rethink this. This *should* be &mut, but we don't - /// want anything else in here to be mut to protect OPTE's design - /// (i.e., actions don't *actually* modify packets). So we maybe - /// need a view type preventing mut use of the other fields? - pub fn push_stat(&mut self, stat: Arc) { - self.stats.push(stat.into()); - } } impl From<&PacketData> for InnerFlowId { @@ -783,17 +936,14 @@ where let flow = headers.flow(); let headers: OpteMeta<_> = headers.into(); - let initial_lens = Some( - InitialLayerLens { - outer_eth: headers.outer_eth.packet_length(), - outer_l3: headers.outer_l3.packet_length(), - outer_encap: headers.outer_encap.packet_length(), - inner_eth: headers.inner_eth.packet_length(), - inner_l3: headers.inner_l3.packet_length(), - inner_ulp: headers.inner_ulp.packet_length(), - } - .into(), - ); + let initial_lens = InitialLayerLens { + outer_eth: headers.outer_eth.packet_length(), + outer_l3: headers.outer_l3.packet_length(), + outer_encap: headers.outer_encap.packet_length(), + inner_eth: headers.inner_eth.packet_length(), + inner_l3: headers.inner_l3.packet_length(), + inner_ulp: headers.inner_ulp.packet_length(), + }; let body = PktBodyWalker::new(last_chunk, data); let meta = Box::new(PacketData { headers, @@ -843,6 +993,10 @@ where } impl Packet> { + pub fn meta_view(&mut self) -> PacketDataView { + self.state.meta.view() + } + pub fn meta(&self) -> &PacketData { &self.state.meta } @@ -873,7 +1027,7 @@ impl Packet> { // pkt space. let l4_hash = self.l4_hash(); let state = &mut self.state; - let init_lens = state.meta.initial_lens.as_ref().unwrap(); + let init_lens = &state.meta.initial_lens; let headers = &state.meta.headers; let payload_len = state.len - init_lens.hdr_len(); let mut encapped_len = payload_len; @@ -1435,6 +1589,7 @@ impl> LiteParsed {} // ActionDescs to be generic over T (trait object safety rules, etc.), // in addition to needing to rework Hairpin actions. pub type MblkPacketData<'a> = PacketData>; +pub type MblkPacketDataView<'a, 'b> = PacketDataView<'a, MsgBlkIterMut<'b>>; pub type MblkFullParsed<'a> = FullParsed>; pub type MblkLiteParsed<'a, M> = LiteParsed, M>; diff --git a/lib/opte/src/engine/rule.rs b/lib/opte/src/engine/rule.rs index 7fe54844..085720c5 100644 --- a/lib/opte/src/engine/rule.rs +++ b/lib/opte/src/engine/rule.rs @@ -31,6 +31,7 @@ use super::packet::BodyTransform; use super::packet::InnerFlowId; use super::packet::MblkFullParsed; use super::packet::MblkPacketData; +use super::packet::MblkPacketDataView; use super::packet::Packet; use super::packet::PacketData; use super::packet::Pullup; @@ -179,8 +180,7 @@ pub trait ActionDesc { fn gen_bt( &self, _dir: Direction, - _meta: &MblkPacketData, - _payload_seg: &[u8], + _meta: MblkPacketDataView, ) -> Result>, GenBtError> { Ok(None) } @@ -277,7 +277,7 @@ impl StaticAction for Identity { &self, _dir: Direction, _flow_id: &InnerFlowId, - _pkt_meta: &MblkPacketData, + _pkt_meta: MblkPacketDataView, _action_meta: &mut ActionMeta, ) -> GenHtResult { Ok(AllowOrDeny::Allow(HdrTransform::identity(&self.name))) @@ -706,7 +706,7 @@ pub trait StatefulAction: Display { fn gen_desc( &self, flow_id: &InnerFlowId, - pkt: &Packet, + pkt: MblkPacketDataView, meta: &mut ActionMeta, ) -> GenDescResult; @@ -726,7 +726,7 @@ pub trait StaticAction: Display { &self, dir: Direction, flow_id: &InnerFlowId, - packet_meta: &MblkPacketData, + packet_meta: MblkPacketDataView, action_meta: &mut ActionMeta, ) -> GenHtResult; @@ -798,7 +798,7 @@ pub trait HairpinAction: Display { /// modifications made by previous layers up to this point. /// This also provides access to a reader over the packet body, /// positioned after the parsed metadata. - fn gen_packet(&self, meta: &MblkPacketData) -> GenPacketResult; + fn gen_packet(&self, meta: MblkPacketDataView) -> GenPacketResult; /// Return the predicates implicit to this action. /// diff --git a/lib/opte/src/engine/snat.rs b/lib/opte/src/engine/snat.rs index e8e4f121..248a39a5 100644 --- a/lib/opte/src/engine/snat.rs +++ b/lib/opte/src/engine/snat.rs @@ -13,6 +13,7 @@ use super::headers::UlpHeaderAction; use super::headers::UlpMetaModify; use super::packet::InnerFlowId; use super::packet::MblkFullParsed; +use super::packet::MblkPacketDataView; use super::packet::Packet; use super::port::meta::ActionMeta; use super::predicate::DataPredicate; @@ -243,13 +244,11 @@ impl SNat { fn gen_icmp_desc( &self, nat: SNatAlloc, - pkt: &Packet, + pkt: MblkPacketDataView, ) -> GenDescResult { - let meta = pkt.meta(); - let echo_ident = match T::MESSAGE_PROTOCOL { Protocol::ICMP => { - let icmp = meta.inner_icmp().ok_or(GenIcmpErr::MetaNotFound)?; + let icmp = pkt.inner_icmp().ok_or(GenIcmpErr::MetaNotFound)?; Ok(if icmp.ty() == IcmpV4Type::ECHO_REQUEST { icmp.echo_id() @@ -259,7 +258,7 @@ impl SNat { } Protocol::ICMPv6 => { let icmp6 = - meta.inner_icmp6().ok_or(GenIcmpErr::MetaNotFound)?; + pkt.inner_icmp6().ok_or(GenIcmpErr::MetaNotFound)?; Ok(if icmp6.ty() == IcmpV6Type::ECHO_REQUEST { icmp6.echo_id() @@ -306,7 +305,7 @@ where fn gen_desc( &self, flow_id: &InnerFlowId, - pkt: &Packet, + pkt: MblkPacketDataView, _meta: &mut ActionMeta, ) -> GenDescResult { let proto = flow_id.protocol(); @@ -560,10 +559,11 @@ mod test { // Verify descriptor generation. // ================================================================ let flow_out = InnerFlowId::from(pkt.meta()); - let desc = match snat.gen_desc(&flow_out, &pkt, &mut action_meta) { - Ok(AllowOrDeny::Allow(desc)) => desc, - _ => panic!("expected AllowOrDeny::Allow(desc) result"), - }; + let desc = + match snat.gen_desc(&flow_out, pkt.meta_view(), &mut action_meta) { + Ok(AllowOrDeny::Allow(desc)) => desc, + _ => panic!("expected AllowOrDeny::Allow(desc) result"), + }; assert!(!snat.tcp_pool.verify_available(priv_ip, pub_ip, pub_port)); // ================================================================ diff --git a/lib/oxide-vpc/src/engine/gateway/mod.rs b/lib/oxide-vpc/src/engine/gateway/mod.rs index b24e6e50..d79465f6 100644 --- a/lib/oxide-vpc/src/engine/gateway/mod.rs +++ b/lib/oxide-vpc/src/engine/gateway/mod.rs @@ -62,6 +62,7 @@ use opte::engine::layer::Layer; use opte::engine::layer::LayerActions; use opte::engine::packet::InnerFlowId; use opte::engine::packet::MblkPacketData; +use opte::engine::packet::MblkPacketDataView; use opte::engine::port::PortBuilder; use opte::engine::port::Pos; use opte::engine::port::meta::ActionMeta; @@ -154,7 +155,7 @@ impl StaticAction for RewriteSrcMac { &self, _dir: Direction, _flow_id: &InnerFlowId, - _packet_meta: &MblkPacketData, + _packet_meta: MblkPacketDataView, _action_meta: &mut ActionMeta, ) -> GenHtResult { Ok(AllowOrDeny::Allow(HdrTransform { diff --git a/lib/oxide-vpc/src/engine/overlay.rs b/lib/oxide-vpc/src/engine/overlay.rs index 111b05e0..d16c8d78 100644 --- a/lib/oxide-vpc/src/engine/overlay.rs +++ b/lib/oxide-vpc/src/engine/overlay.rs @@ -49,6 +49,7 @@ use opte::engine::layer::Layer; use opte::engine::layer::LayerActions; use opte::engine::packet::InnerFlowId; use opte::engine::packet::MblkPacketData; +use opte::engine::packet::MblkPacketDataView; use opte::engine::port::PortBuilder; use opte::engine::port::Pos; use opte::engine::port::meta::ActionMeta; @@ -201,7 +202,7 @@ impl StaticAction for EncapAction { // The encap action is only used for outgoing. _dir: Direction, flow_id: &InnerFlowId, - _pkt_meta: &MblkPacketData, + _pkt_meta: MblkPacketDataView, action_meta: &mut ActionMeta, ) -> GenHtResult { let f_hash = flow_id.crc32(); @@ -387,7 +388,7 @@ impl StaticAction for DecapAction { // The decap action is only used for inbound. _dir: Direction, _flow_id: &InnerFlowId, - pkt_meta: &MblkPacketData, + pkt_meta: MblkPacketDataView, action_meta: &mut ActionMeta, ) -> GenHtResult { match pkt_meta.outer_encap_geneve_vni_and_origin() { From 0d79c390c17d7ea37d94857f3901b331306b9d2a Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Mon, 30 Jun 2025 15:32:35 +0100 Subject: [PATCH 25/37] Cleanup leftover imports etc. --- lib/opte/src/ddi/mblk.rs | 8 ++++---- lib/opte/src/engine/dhcp.rs | 1 - lib/opte/src/engine/dhcpv6/protocol.rs | 1 - lib/opte/src/engine/icmp/v4.rs | 1 - lib/opte/src/engine/icmp/v6.rs | 1 - lib/opte/src/engine/ip/v6.rs | 6 +++--- lib/opte/src/engine/layer.rs | 2 +- lib/opte/src/engine/nat.rs | 3 +-- lib/opte/src/engine/rule.rs | 7 +++---- lib/opte/src/engine/snat.rs | 3 +-- lib/oxide-vpc/src/engine/gateway/mod.rs | 1 - lib/oxide-vpc/src/engine/overlay.rs | 1 - lib/oxide-vpc/tests/integration_tests.rs | 10 +++++----- 13 files changed, 18 insertions(+), 27 deletions(-) diff --git a/lib/opte/src/ddi/mblk.rs b/lib/opte/src/ddi/mblk.rs index 34295102..30a8b579 100644 --- a/lib/opte/src/ddi/mblk.rs +++ b/lib/opte/src/ddi/mblk.rs @@ -1329,7 +1329,7 @@ mod test { _ => panic!("expected failure, accidentally succeeded at parsing"), } - let pkt2 = MsgBlk::copy(&[]); + let pkt2 = MsgBlk::copy([]); assert_eq!(pkt2.len(), 0); assert_eq!(pkt2.seg_len(), 1); assert_eq!(pkt2.tail_capacity(), 16); @@ -1386,9 +1386,9 @@ mod test { #[test] fn truncate() { - let mut p1 = MsgBlk::copy(&[0, 1, 2, 3]); - p1.append(MsgBlk::copy(&[4, 5, 6, 7])); - p1.append(MsgBlk::copy(&[8, 9, 10, 11])); + let mut p1 = MsgBlk::copy([0, 1, 2, 3]); + p1.append(MsgBlk::copy([4, 5, 6, 7])); + p1.append(MsgBlk::copy([8, 9, 10, 11])); assert_eq!(p1.seg_len(), 3); assert_eq!(p1.byte_len(), 12); diff --git a/lib/opte/src/engine/dhcp.rs b/lib/opte/src/engine/dhcp.rs index 1ddf1d41..5da86643 100644 --- a/lib/opte/src/engine/dhcp.rs +++ b/lib/opte/src/engine/dhcp.rs @@ -8,7 +8,6 @@ use super::ether::Ethernet; use super::ip::v4::*; -use super::packet::MblkPacketData; use super::packet::MblkPacketDataView; use super::predicate::DataPredicate; use super::predicate::EtherAddrMatch; diff --git a/lib/opte/src/engine/dhcpv6/protocol.rs b/lib/opte/src/engine/dhcpv6/protocol.rs index 27344dd0..627bbeff 100644 --- a/lib/opte/src/engine/dhcpv6/protocol.rs +++ b/lib/opte/src/engine/dhcpv6/protocol.rs @@ -25,7 +25,6 @@ use crate::engine::dhcpv6::options::StatusCode; use crate::engine::ether::Ethernet; use crate::engine::ip::v6::Ipv6; use crate::engine::ip::v6::Ipv6Ref; -use crate::engine::packet::MblkPacketData; use crate::engine::packet::MblkPacketDataView; use crate::engine::predicate::DataPredicate; use crate::engine::predicate::EtherAddrMatch; diff --git a/lib/opte/src/engine/icmp/v4.rs b/lib/opte/src/engine/icmp/v4.rs index 98601757..bee32dbc 100644 --- a/lib/opte/src/engine/icmp/v4.rs +++ b/lib/opte/src/engine/icmp/v4.rs @@ -11,7 +11,6 @@ use crate::ddi::mblk::MsgBlk; use crate::engine::checksum::HeaderChecksum; use crate::engine::ether::Ethernet; use crate::engine::ip::v4::Ipv4; -use crate::engine::packet::MblkPacketData; use crate::engine::packet::MblkPacketDataView; use crate::engine::predicate::Ipv4AddrMatch; use ingot::ethernet::Ethertype; diff --git a/lib/opte/src/engine/icmp/v6.rs b/lib/opte/src/engine/icmp/v6.rs index aee80fc9..f523f68b 100644 --- a/lib/opte/src/engine/icmp/v6.rs +++ b/lib/opte/src/engine/icmp/v6.rs @@ -12,7 +12,6 @@ use crate::engine::checksum::HeaderChecksum; use crate::engine::ether::Ethernet; use crate::engine::ip::v6::Ipv6; use crate::engine::ip::v6::Ipv6Ref; -use crate::engine::packet::MblkPacketData; use crate::engine::packet::MblkPacketDataView; use crate::engine::predicate::Ipv6AddrMatch; use alloc::string::String; diff --git a/lib/opte/src/engine/ip/v6.rs b/lib/opte/src/engine/ip/v6.rs index 136c7faa..5bd1aec2 100644 --- a/lib/opte/src/engine/ip/v6.rs +++ b/lib/opte/src/engine/ip/v6.rs @@ -441,7 +441,7 @@ pub(crate) mod test { header_end: usize, ) { assert_eq!( - header.packet_length() as usize, + { header.packet_length() }, header_end, "Header length does not include all extension headers" ); @@ -570,10 +570,10 @@ pub(crate) mod test { // Parsing this one will fail -- next header is hop-by-hop, which is // an RFC6564 header -- we don't have (0xc1 * 8) bytes here!! - assert!(ValidIpv6::parse(&buf[..]).is_err()); + assert!(ValidIpv6::parse(buf).is_err()); // We can construct this manually via ingot... - let (v6, _rem) = Accessor::read_from_prefix(&buf[..]).unwrap(); + let (v6, _rem) = Accessor::read_from_prefix(buf).unwrap(); let ip = ValidIpv6(v6, Header::Repr(Default::default())); assert!(ip.validate(120).is_err()); } diff --git a/lib/opte/src/engine/layer.rs b/lib/opte/src/engine/layer.rs index b9cbf26d..614f2dbe 100644 --- a/lib/opte/src/engine/layer.rs +++ b/lib/opte/src/engine/layer.rs @@ -1973,7 +1973,7 @@ mod test { // The pkt/rdr aren't actually used in this case. let ameta = ActionMeta::new(); let ifid = *pmeta.flow(); - assert!(rule_table.find_match(&ifid, &pmeta.meta(), &ameta).is_some()); + assert!(rule_table.find_match(&ifid, pmeta.meta(), &ameta).is_some()); } } // TODO Reinstate diff --git a/lib/opte/src/engine/nat.rs b/lib/opte/src/engine/nat.rs index b7b1958a..1142623a 100644 --- a/lib/opte/src/engine/nat.rs +++ b/lib/opte/src/engine/nat.rs @@ -17,9 +17,7 @@ use super::ip::v6::ValidIpv6; use super::packet::BodyTransform; use super::packet::BodyTransformError; use super::packet::InnerFlowId; -use super::packet::MblkFullParsed; use super::packet::MblkPacketDataView; -use super::packet::Packet; use super::parse::Ulp; use super::parse::UlpRepr; use super::port::meta::ActionMeta; @@ -390,6 +388,7 @@ mod test { use super::*; use crate::ddi::mblk::MsgBlk; + use crate::engine::packet::Packet; use crate::engine::GenericUlp; use crate::engine::ether::Ethernet; use crate::engine::ether::EthernetRef; diff --git a/lib/opte/src/engine/rule.rs b/lib/opte/src/engine/rule.rs index 085720c5..654dfcb6 100644 --- a/lib/opte/src/engine/rule.rs +++ b/lib/opte/src/engine/rule.rs @@ -29,10 +29,8 @@ use super::ip::v6::Ipv6Mut; use super::ip::v6::v6_set_next_header; use super::packet::BodyTransform; use super::packet::InnerFlowId; -use super::packet::MblkFullParsed; use super::packet::MblkPacketData; use super::packet::MblkPacketDataView; -use super::packet::Packet; use super::packet::PacketData; use super::packet::Pullup; use super::parse::ValidUlp; @@ -1175,6 +1173,7 @@ fn rule_matching() { use ingot::ip::IpProtocol; use ingot::tcp::Tcp; use ingot::types::HeaderLen; + use crate::engine::packet::Packet; let action = Identity::new("rule_matching"); let mut r1 = Rule::new(1, Action::Static(Arc::new(action))); @@ -1213,7 +1212,7 @@ fn rule_matching() { let r1 = r1.finalize(); let ameta = ActionMeta::new(); - assert!(r1.is_match(&meta, &ameta)); + assert!(r1.is_match(meta, &ameta)); let new_src_ip = "10.11.11.99".parse().unwrap(); @@ -1222,5 +1221,5 @@ fn rule_matching() { v4.set_source(new_src_ip); } - assert!(!r1.is_match(&meta, &ameta)); + assert!(!r1.is_match(meta, &ameta)); } diff --git a/lib/opte/src/engine/snat.rs b/lib/opte/src/engine/snat.rs index 248a39a5..24a59899 100644 --- a/lib/opte/src/engine/snat.rs +++ b/lib/opte/src/engine/snat.rs @@ -12,9 +12,7 @@ use super::headers::UlpGenericModify; use super::headers::UlpHeaderAction; use super::headers::UlpMetaModify; use super::packet::InnerFlowId; -use super::packet::MblkFullParsed; use super::packet::MblkPacketDataView; -use super::packet::Packet; use super::port::meta::ActionMeta; use super::predicate::DataPredicate; use super::predicate::Predicate; @@ -483,6 +481,7 @@ mod test { use crate::engine::ether::EthernetRef; use crate::engine::ip::v4::Ipv4; use crate::engine::ip::v4::Ipv4Ref; + use crate::engine::Packet; use super::*; diff --git a/lib/oxide-vpc/src/engine/gateway/mod.rs b/lib/oxide-vpc/src/engine/gateway/mod.rs index d79465f6..946306cc 100644 --- a/lib/oxide-vpc/src/engine/gateway/mod.rs +++ b/lib/oxide-vpc/src/engine/gateway/mod.rs @@ -61,7 +61,6 @@ use opte::engine::layer::DefaultAction; use opte::engine::layer::Layer; use opte::engine::layer::LayerActions; use opte::engine::packet::InnerFlowId; -use opte::engine::packet::MblkPacketData; use opte::engine::packet::MblkPacketDataView; use opte::engine::port::PortBuilder; use opte::engine::port::Pos; diff --git a/lib/oxide-vpc/src/engine/overlay.rs b/lib/oxide-vpc/src/engine/overlay.rs index d16c8d78..e961eeca 100644 --- a/lib/oxide-vpc/src/engine/overlay.rs +++ b/lib/oxide-vpc/src/engine/overlay.rs @@ -48,7 +48,6 @@ use opte::engine::layer::DefaultAction; use opte::engine::layer::Layer; use opte::engine::layer::LayerActions; use opte::engine::packet::InnerFlowId; -use opte::engine::packet::MblkPacketData; use opte::engine::packet::MblkPacketDataView; use opte::engine::port::PortBuilder; use opte::engine::port::Pos; diff --git a/lib/oxide-vpc/tests/integration_tests.rs b/lib/oxide-vpc/tests/integration_tests.rs index 2dec9d1f..31c5a218 100644 --- a/lib/oxide-vpc/tests/integration_tests.rs +++ b/lib/oxide-vpc/tests/integration_tests.rs @@ -2135,7 +2135,7 @@ fn test_guest_to_gateway_icmpv6_ping( let msg_type = Icmpv6Message::from(icmp6.ty().0); let msg_code = icmp6.code(); - reply_body.extend(reply.to_full_meta().meta().copy_remaining().into_iter()); + reply_body.extend(reply.to_full_meta().meta().copy_remaining()); let reply_pkt = Icmpv6Packet::new_checked(&reply_body).unwrap(); // Verify the parsed metadata matches the packet @@ -2249,7 +2249,7 @@ fn gateway_router_advert_reply() { let ip6_src = ip6.source(); let ip6_dst = ip6.destination(); - reply_body.extend(reply.to_full_meta().meta().copy_remaining().into_iter()); + reply_body.extend(reply.to_full_meta().meta().copy_remaining()); let reply_pkt = Icmpv6Packet::new_checked(&reply_body).unwrap(); let mut csum = CsumCapab::ignored(); @@ -2489,7 +2489,7 @@ fn validate_hairpin_advert( let ip6_src = ip6.source(); let ip6_dst = ip6.destination(); - reply_body.extend(reply.to_full_meta().meta().copy_remaining().into_iter()); + reply_body.extend(reply.to_full_meta().meta().copy_remaining()); let reply_pkt = Icmpv6Packet::new_checked(&reply_body).unwrap(); // Validate the details of the Neighbor Advertisement itself. @@ -4796,7 +4796,7 @@ fn icmp_inner_has_nat_applied() { let mut ip: L3<&mut [u8]> = Ipv4 { source: g1_cfg.ipv4().private_ip, - destination: remote_addr.into(), + destination: remote_addr, protocol: IngotIpProto::ICMP, total_len: (icmp.buffer_len() + Ipv4::MINIMUM_LENGTH) as u16, ..Default::default() @@ -4871,7 +4871,7 @@ fn icmpv6_inner_has_nat_applied() { let ip = Ipv6 { source: remote_addr, - destination: eph_ip.into(), + destination: eph_ip, next_header: IngotIpProto::ICMP_V6, payload_len: icmp.buffer_len() as u16, hop_limit: 64, From fe81fbf6fb989f257662d7b5d08607d92caf8ac8 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Mon, 30 Jun 2025 16:52:33 +0100 Subject: [PATCH 26/37] Some brokwn doclinks. --- lib/opte-test-utils/src/pcap.rs | 2 ++ lib/opte/src/engine/rule.rs | 2 ++ 2 files changed, 4 insertions(+) diff --git a/lib/opte-test-utils/src/pcap.rs b/lib/opte-test-utils/src/pcap.rs index cfe00103..67523d46 100644 --- a/lib/opte-test-utils/src/pcap.rs +++ b/lib/opte-test-utils/src/pcap.rs @@ -37,6 +37,8 @@ fn next_block(offset: &[u8]) -> (&[u8], LegacyPcapBlock) { } /// Build a packet capture file from a series of [`Packet`]. +/// +/// [`Packet`]: opte::engine::packet::Packet pub struct PcapBuilder { file: File, } diff --git a/lib/opte/src/engine/rule.rs b/lib/opte/src/engine/rule.rs index d7d7aafa..a1dcc9d4 100644 --- a/lib/opte/src/engine/rule.rs +++ b/lib/opte/src/engine/rule.rs @@ -796,6 +796,8 @@ pub trait HairpinAction: Display { /// modifications made by previous layers up to this point. /// This also provides access to a reader over the packet body, /// positioned after the parsed metadata. + /// + /// [`Packet`]: super::packet::Packet fn gen_packet(&self, meta: MblkPacketDataView) -> GenPacketResult; /// Return the predicates implicit to this action. From 56a126588c3ff40342902a7a505f0edce23b1810 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Tue, 1 Jul 2025 13:24:25 +0100 Subject: [PATCH 27/37] Plumb through public `Packet` rather than private `meta` Should be better for being able to check predicate matches etc. in downstream VPC impls. --- lib/opte/src/engine/dhcpv6/protocol.rs | 5 +- lib/opte/src/engine/icmp/v4.rs | 2 +- lib/opte/src/engine/icmp/v6.rs | 12 +- lib/opte/src/engine/layer.rs | 153 ++++---- lib/opte/src/engine/mod.rs | 2 +- lib/opte/src/engine/nat.rs | 38 +- lib/opte/src/engine/packet.rs | 469 ++++++++--------------- lib/opte/src/engine/port/mod.rs | 29 +- lib/opte/src/engine/predicate.rs | 57 +-- lib/opte/src/engine/rule.rs | 22 +- lib/opte/src/engine/snat.rs | 57 ++- lib/opte/src/engine/stat.rs | 36 +- lib/oxide-vpc/src/engine/mod.rs | 2 +- lib/oxide-vpc/src/engine/overlay.rs | 2 +- lib/oxide-vpc/tests/integration_tests.rs | 151 +++++--- xde/src/xde.rs | 8 +- 16 files changed, 462 insertions(+), 583 deletions(-) diff --git a/lib/opte/src/engine/dhcpv6/protocol.rs b/lib/opte/src/engine/dhcpv6/protocol.rs index 0f0b01e7..1c4af4db 100644 --- a/lib/opte/src/engine/dhcpv6/protocol.rs +++ b/lib/opte/src/engine/dhcpv6/protocol.rs @@ -620,7 +620,7 @@ fn generate_packet<'a>( source: Ipv6Addr::from_eui64(&action.server_mac), // Safety: We're only here if the predicates match, one of which is // IPv6. - destination: meta.inner_ip6().unwrap().source(), + destination: meta.headers.inner_ip6().unwrap().source(), next_header: IngotIpProto::UDP, payload_len: udp.length, ..Default::default() @@ -725,13 +725,12 @@ mod test { let pkt = Packet::parse_outbound(pkt.iter_mut(), GenericUlp {}) .unwrap() .to_full_meta(); - let pmeta = pkt.meta(); let ameta = ActionMeta::new(); let client_mac = MacAddr::from_const([0xa8, 0x40, 0x25, 0xfa, 0xdd, 0x0b]); for pred in dhcpv6_server_predicates(&client_mac) { assert!( - pred.is_match(pmeta, &ameta), + pred.is_match(&pkt, &ameta), "Expected predicate to match snooped Solicit test packet: {pred}", ); } diff --git a/lib/opte/src/engine/icmp/v4.rs b/lib/opte/src/engine/icmp/v4.rs index bee32dbc..1ab0f9c4 100644 --- a/lib/opte/src/engine/icmp/v4.rs +++ b/lib/opte/src/engine/icmp/v4.rs @@ -51,7 +51,7 @@ impl HairpinAction for IcmpEchoReply { } fn gen_packet(&self, meta: MblkPacketDataView) -> GenPacketResult { - let Some(icmp) = meta.inner_icmp() else { + let Some(icmp) = meta.headers.inner_icmp() else { // Getting here implies the predicate matched, but that the // extracted metadata indicates this isn't an ICMP packet. That // should be impossible, but we avoid panicking given the kernel diff --git a/lib/opte/src/engine/icmp/v6.rs b/lib/opte/src/engine/icmp/v6.rs index f523f68b..eb5d09d1 100644 --- a/lib/opte/src/engine/icmp/v6.rs +++ b/lib/opte/src/engine/icmp/v6.rs @@ -110,7 +110,7 @@ impl HairpinAction for Icmpv6EchoReply { } fn gen_packet(&self, meta: MblkPacketDataView) -> GenPacketResult { - let Some(icmp6) = meta.inner_icmp6() else { + let Some(icmp6) = meta.headers.inner_icmp6() else { // Getting here implies the predicate matched, but that the // extracted metadata indicates this isn't an ICMPv6 packet. That // should be impossible, but we avoid panicking given the kernel @@ -238,7 +238,7 @@ impl HairpinAction for RouterAdvertisement { use smoltcp::time::Duration; use smoltcp::wire::NdiscRouterFlags; - let Some(icmp6) = meta.inner_icmp6() else { + let Some(icmp6) = meta.headers.inner_icmp6() else { // Getting here implies the predicate matched, but that the // extracted metadata indicates this isn't an ICMPv6 packet. That // should be impossible, but we avoid panicking given the kernel @@ -250,7 +250,7 @@ impl HairpinAction for RouterAdvertisement { // Collect the src / dst IP addresses, which are needed to emit the // resulting ICMPv6 packet using `smoltcp`. - let Some(ip6) = meta.inner_ip6() else { + let Some(ip6) = meta.headers.inner_ip6() else { // We got the ICMPv6 metadata above but no IPv6 somehow? return Err(GenErr::Unexpected(format!( "Expected IPv6 packet metadata, but found: {meta:?}", @@ -350,7 +350,7 @@ impl HairpinAction for RouterAdvertisement { let ip6 = Ipv6 { source: *self.ip(), // Safety: We match on this being Some(_) above, so unwrap is safe. - destination: meta.inner_ip6().unwrap().source(), + destination: meta.headers.inner_ip6().unwrap().source(), next_header: IngotIpProto::ICMP_V6, payload_len: reply_len as u16, @@ -553,7 +553,7 @@ impl HairpinAction for NeighborAdvertisement { } fn gen_packet(&self, meta: MblkPacketDataView) -> GenPacketResult { - let Some(icmp6) = meta.inner_icmp6() else { + let Some(icmp6) = meta.headers.inner_icmp6() else { // Getting here implies the predicate matched, but that the // extracted metadata indicates this isn't an ICMPv6 packet. That // should be impossible, but we avoid panicking given the kernel @@ -564,7 +564,7 @@ impl HairpinAction for NeighborAdvertisement { }; // Sanity check that this is actually in IPv6 packet. - let metadata = meta.inner_ip6().ok_or_else(|| { + let metadata = meta.headers.inner_ip6().ok_or_else(|| { // We got the ICMPv6 metadata above but no IPv6 somehow? GenErr::Unexpected(format!( "Expected IPv6 packet metadata, but found: {meta:?}", diff --git a/lib/opte/src/engine/layer.rs b/lib/opte/src/engine/layer.rs index 614f2dbe..b8c5aefc 100644 --- a/lib/opte/src/engine/layer.rs +++ b/lib/opte/src/engine/layer.rs @@ -15,7 +15,6 @@ use super::packet::BodyTransformError; use super::packet::FLOW_ID_DEFAULT; use super::packet::InnerFlowId; use super::packet::MblkFullParsed; -use super::packet::MblkPacketData; use super::packet::Packet; use super::port::PortBuilder; use super::port::Transforms; @@ -850,7 +849,7 @@ impl Layer { ) -> result::Result { let flow_before = *pkt.flow(); self.layer_process_entry_probe(dir, pkt.flow()); - pkt.meta_mut().stats.new_layer(); + pkt.meta_internal_mut().stats.new_layer(); let res = match dir { Direction::Out => self.process_out(ectx, pkt, xforms, ameta), Direction::In => self.process_in(ectx, pkt, xforms, ameta), @@ -891,7 +890,7 @@ impl Layer { }; if let Some(stat) = stat { - pkt.meta_mut().stats.push(stat.into()); + pkt.meta_internal_mut().stats.push(stat.into()); } match action { @@ -914,7 +913,7 @@ impl Layer { pkt.flow(), ); - if let Some(bt) = desc.gen_bt(Direction::In, pkt.meta_view())? { + if let Some(bt) = desc.gen_bt(Direction::In, pkt.meta())? { pkt.body_transform(Direction::In, &*bt)?; xforms.body.push(bt); } @@ -939,7 +938,7 @@ impl Layer { use Direction::In; self.stats.vals.in_lft_miss += 1; - let rule = self.rules_in.find_match(pkt.flow(), pkt.meta(), ameta); + let rule = self.rules_in.find_match(pkt.flow(), pkt, ameta); let (action, stat) = if let Some(rule) = rule { self.stats.vals.in_rule_match += 1; @@ -950,7 +949,7 @@ impl Layer { (self.default_in.into(), Arc::clone(&self.default_in_stat)) }; - pkt.meta_mut().stats.push(stat.into()); + pkt.meta_internal_mut().stats.push(stat.into()); let flow_before = *pkt.flow(); match action { @@ -965,7 +964,8 @@ impl Layer { }); } - let stat = pkt.meta_mut().stats.new_layer_lft(ectx.stats); + let stat = + pkt.meta_internal_mut().stats.new_layer_lft(ectx.stats); // The outbound flow ID mirrors the inbound. Remember, // the "top" of layer represents how the client sees @@ -1004,35 +1004,31 @@ impl Layer { }, Action::Static(action) => { - let ht = match action.gen_ht( - In, - &flow_before, - pkt.meta_view(), - ameta, - ) { - Ok(aord) => match aord { - AllowOrDeny::Allow(ht) => ht, - AllowOrDeny::Deny => { - return Ok(LayerResult::Deny { - name: self.name, - reason: DenyReason::Action, + let ht = + match action.gen_ht(In, &flow_before, pkt.meta(), ameta) { + Ok(aord) => match aord { + AllowOrDeny::Allow(ht) => ht, + AllowOrDeny::Deny => { + return Ok(LayerResult::Deny { + name: self.name, + reason: DenyReason::Action, + }); + } + }, + + Err(e) => { + self.record_gen_ht_failure( + ectx.user_ctx, + In, + pkt.flow(), + &e, + ); + return Err(LayerError::GenHdrTransform { + layer: self.name, + err: e, }); } - }, - - Err(e) => { - self.record_gen_ht_failure( - ectx.user_ctx, - In, - pkt.flow(), - &e, - ); - return Err(LayerError::GenHdrTransform { - layer: self.name, - err: e, - }); - } - }; + }; pkt.hdr_transform(&ht)?; xforms.hdr.push(ht); @@ -1086,8 +1082,7 @@ impl Layer { } let desc = - match action.gen_desc(&flow_before, pkt.meta_view(), ameta) - { + match action.gen_desc(&flow_before, pkt.meta(), ameta) { Ok(aord) => match aord { AllowOrDeny::Allow(desc) => desc, @@ -1122,12 +1117,13 @@ impl Layer { pkt.flow(), ); - if let Some(bt) = desc.gen_bt(In, pkt.meta_view())? { + if let Some(bt) = desc.gen_bt(In, pkt.meta())? { pkt.body_transform(In, &*bt)?; xforms.body.push(bt); } - let stat = pkt.meta_mut().stats.new_layer_lft(ectx.stats); + let stat = + pkt.meta_internal_mut().stats.new_layer_lft(ectx.stats); // The outbound flow ID must be calculated _after_ the // header transformation. Remember, the "top" @@ -1148,7 +1144,7 @@ impl Layer { } Action::Hairpin(action) => { - match action.gen_packet(pkt.meta_view()) { + match action.gen_packet(pkt.meta()) { Ok(AllowOrDeny::Allow(pkt)) => { Ok(LayerResult::Hairpin(pkt)) } @@ -1199,7 +1195,7 @@ impl Layer { }; if let Some(stat) = stat { - pkt.meta_mut().stats.push(stat.into()); + pkt.meta_internal_mut().stats.push(stat.into()); } match action { @@ -1222,9 +1218,7 @@ impl Layer { pkt.flow(), ); - if let Some(bt) = - desc.gen_bt(Direction::Out, pkt.meta_view())? - { + if let Some(bt) = desc.gen_bt(Direction::Out, pkt.meta())? { pkt.body_transform(Direction::Out, &*bt)?; xforms.body.push(bt); } @@ -1249,7 +1243,7 @@ impl Layer { use Direction::Out; self.stats.vals.out_lft_miss += 1; - let rule = self.rules_out.find_match(pkt.flow(), pkt.meta(), ameta); + let rule = self.rules_out.find_match(pkt.flow(), pkt, ameta); let (action, stat) = if let Some(rule) = rule { self.stats.vals.out_rule_match += 1; @@ -1260,7 +1254,7 @@ impl Layer { (self.default_out.into(), Arc::clone(&self.default_out_stat)) }; - pkt.meta_mut().stats.push(stat.into()); + pkt.meta_internal_mut().stats.push(stat.into()); let flow_before = *pkt.flow(); match action { @@ -1275,7 +1269,8 @@ impl Layer { }); } - let stat = pkt.meta_mut().stats.new_layer_lft(ectx.stats); + let stat = + pkt.meta_internal_mut().stats.new_layer_lft(ectx.stats); // The inbound flow ID must be calculated _after_ the // header transformation. Remember, the "top" @@ -1321,35 +1316,31 @@ impl Layer { }, Action::Static(action) => { - let ht = match action.gen_ht( - Out, - &flow_before, - pkt.meta_view(), - ameta, - ) { - Ok(aord) => match aord { - AllowOrDeny::Allow(ht) => ht, - AllowOrDeny::Deny => { - return Ok(LayerResult::Deny { - name: self.name, - reason: DenyReason::Action, + let ht = + match action.gen_ht(Out, &flow_before, pkt.meta(), ameta) { + Ok(aord) => match aord { + AllowOrDeny::Allow(ht) => ht, + AllowOrDeny::Deny => { + return Ok(LayerResult::Deny { + name: self.name, + reason: DenyReason::Action, + }); + } + }, + + Err(e) => { + self.record_gen_ht_failure( + ectx.user_ctx, + Out, + pkt.flow(), + &e, + ); + return Err(LayerError::GenHdrTransform { + layer: self.name, + err: e, }); } - }, - - Err(e) => { - self.record_gen_ht_failure( - ectx.user_ctx, - Out, - pkt.flow(), - &e, - ); - return Err(LayerError::GenHdrTransform { - layer: self.name, - err: e, - }); - } - }; + }; pkt.hdr_transform(&ht)?; xforms.hdr.push(ht); @@ -1402,11 +1393,11 @@ impl Layer { }); } - let stat = pkt.meta_mut().stats.new_layer_lft(ectx.stats); + let stat = + pkt.meta_internal_mut().stats.new_layer_lft(ectx.stats); let desc = - match action.gen_desc(&flow_before, pkt.meta_view(), ameta) - { + match action.gen_desc(&flow_before, pkt.meta(), ameta) { Ok(aord) => match aord { AllowOrDeny::Allow(desc) => desc, @@ -1440,7 +1431,7 @@ impl Layer { pkt.flow(), ); - if let Some(bt) = desc.gen_bt(Out, pkt.meta_view())? { + if let Some(bt) = desc.gen_bt(Out, pkt.meta())? { pkt.body_transform(Out, &*bt)?; xforms.body.push(bt); } @@ -1465,7 +1456,7 @@ impl Layer { } Action::Hairpin(action) => { - match action.gen_packet(pkt.meta_view()) { + match action.gen_packet(pkt.meta()) { Ok(AllowOrDeny::Allow(pkt)) => { Ok(LayerResult::Hairpin(pkt)) } @@ -1693,11 +1684,11 @@ impl RuleTable { fn find_match( &mut self, ifid: &InnerFlowId, - pmeta: &MblkPacketData, + pkt: &Packet, ameta: &ActionMeta, ) -> Option<&RuleTableEntry> { for rte in self.rules.iter_mut() { - if rte.rule.is_match(pmeta, ameta) { + if rte.rule.is_match(pkt, ameta) { rte.hits += 1; Self::rule_match_probe( self.port_c.as_c_str(), @@ -1973,7 +1964,7 @@ mod test { // The pkt/rdr aren't actually used in this case. let ameta = ActionMeta::new(); let ifid = *pmeta.flow(); - assert!(rule_table.find_match(&ifid, pmeta.meta(), &ameta).is_some()); + assert!(rule_table.find_match(&ifid, &pmeta, &ameta).is_some()); } } // TODO Reinstate diff --git a/lib/opte/src/engine/mod.rs b/lib/opte/src/engine/mod.rs index f9dc366e..13128860 100644 --- a/lib/opte/src/engine/mod.rs +++ b/lib/opte/src/engine/mod.rs @@ -199,7 +199,7 @@ pub struct HdlPktError(pub &'static str); /// handling of the packet at an individual level, instead of /// treating it as a flow. This is useful for packets that do not /// easily map to the flow model. -pub trait NetworkImpl { +pub trait NetworkImpl: Send + Sync { /// The packet parser for this network implementation. type Parser: NetworkParser; diff --git a/lib/opte/src/engine/nat.rs b/lib/opte/src/engine/nat.rs index a7c97c79..990160dd 100644 --- a/lib/opte/src/engine/nat.rs +++ b/lib/opte/src/engine/nat.rs @@ -238,7 +238,7 @@ impl ActionDesc for NatDesc { // ICMPv4/v6 traffic can carry frames which they were generated // in response to. We need to also apply our NAT transform to // these. - match (meta.inner_ulp(), self.priv_ip, self.external_ip) { + match (&meta.headers.inner_ulp, self.priv_ip, self.external_ip) { ( Some(Ulp::IcmpV4(_)), IpAddr::Ip4(priv_ip), @@ -457,8 +457,8 @@ mod test { // ================================================================ // Verify descriptor generation. // ================================================================ - let flow_out = InnerFlowId::from(pkt.meta()); - let desc = match nat.gen_desc(&flow_out, pkt.meta_view(), &mut ameta) { + let flow_out = InnerFlowId::from(pkt.headers()); + let desc = match nat.gen_desc(&flow_out, pkt.meta(), &mut ameta) { Ok(AllowOrDeny::Allow(desc)) => desc, _ => panic!("expected AllowOrDeny::Allow(desc) result"), }; @@ -467,26 +467,20 @@ mod test { // Verify outbound header transformation // ================================================================ let out_ht = desc.gen_ht(Direction::Out); - let pmo = pkt.meta_mut(); - out_ht.run(pmo).unwrap(); + out_ht.run(&mut pkt).unwrap(); + let pmo = pkt.headers(); - let ether_meta = pmo.inner_ether(); + let ether_meta = &pmo.inner_eth; assert_eq!(ether_meta.source(), priv_mac); assert_eq!(ether_meta.destination(), dest_mac); - let ip4_meta = match pmo.inner_ip4() { - Some(v) => v, - _ => panic!("expect Ipv4Meta"), - }; + let ip4_meta = pmo.inner_ip4().unwrap(); assert_eq!(ip4_meta.source(), pub_ip); assert_eq!(ip4_meta.destination(), outside_ip); assert_eq!(ip4_meta.protocol(), IpProtocol::TCP); - let tcp_meta = match pmo.inner_tcp() { - Some(v) => v, - _ => panic!("expect TcpMeta"), - }; + let tcp_meta = pmo.inner_tcp().unwrap(); assert_eq!(tcp_meta.source(), priv_port); assert_eq!(tcp_meta.destination(), outside_port); @@ -522,27 +516,21 @@ mod test { .unwrap() .to_full_meta(); - let pmi = pkt.meta_mut(); let in_ht = desc.gen_ht(Direction::In); - in_ht.run(pmi).unwrap(); + in_ht.run(&mut pkt).unwrap(); + let pmi = pkt.headers(); - let ether_meta = pmi.inner_ether(); + let ether_meta = &pmi.inner_eth; assert_eq!(ether_meta.source(), dest_mac); assert_eq!(ether_meta.destination(), priv_mac); - let ip4_meta = match pmi.inner_ip4() { - Some(v) => v, - _ => panic!("expect Ipv4Meta"), - }; + let ip4_meta = pmi.inner_ip4().unwrap(); assert_eq!(ip4_meta.source(), outside_ip); assert_eq!(ip4_meta.destination(), priv_ip); assert_eq!(ip4_meta.protocol(), IpProtocol::TCP); - let tcp_meta = match pmi.inner_tcp() { - Some(v) => v, - _ => panic!("expect TcpMeta"), - }; + let tcp_meta = pmi.inner_tcp().unwrap(); assert_eq!(tcp_meta.source(), outside_port); assert_eq!(tcp_meta.destination(), priv_port); diff --git a/lib/opte/src/engine/packet.rs b/lib/opte/src/engine/packet.rs index af8e6ad3..97e35618 100644 --- a/lib/opte/src/engine/packet.rs +++ b/lib/opte/src/engine/packet.rs @@ -293,6 +293,92 @@ pub struct OpteMeta { pub inner_ulp: Option>, } +impl OpteMeta { + /// Returns whether this packet is sourced from outside the rack, + /// in addition to its VNI. + pub fn outer_encap_geneve_vni_and_origin(&self) -> Option<(Vni, bool)> { + match &self.outer_encap { + Some(InlineHeader::Repr(EncapMeta::Geneve(g))) => { + Some((g.vni, g.oxide_external_pkt)) + } + Some(InlineHeader::Raw(ValidEncapMeta::Geneve(_, g))) => { + Some((g.vni(), valid_geneve_has_oxide_external(g))) + } + None => None, + } + } + + pub fn inner_ip4(&self) -> Option<&Ipv4Packet> { + self.inner_l3.as_ref().and_then(|v| match v { + L3::Ipv4(v) => Some(v), + _ => None, + }) + } + + pub fn inner_ip6(&self) -> Option<&Ipv6Packet> { + self.inner_l3.as_ref().and_then(|v| match v { + L3::Ipv6(v) => Some(v), + _ => None, + }) + } + + pub fn inner_icmp(&self) -> Option<&IcmpV4Packet> { + self.inner_ulp.as_ref().and_then(|v| match v { + Ulp::IcmpV4(v) => Some(v), + _ => None, + }) + } + + pub fn inner_icmp6(&self) -> Option<&IcmpV6Packet> { + self.inner_ulp.as_ref().and_then(|v| match v { + Ulp::IcmpV6(v) => Some(v), + _ => None, + }) + } + + pub fn inner_tcp(&self) -> Option<&TcpPacket> { + self.inner_ulp.as_ref().and_then(|v| match v { + Ulp::Tcp(v) => Some(v), + _ => None, + }) + } + + pub fn inner_udp(&self) -> Option<&UdpPacket> { + self.inner_ulp.as_ref().and_then(|v| match v { + Ulp::Udp(v) => Some(v), + _ => None, + }) + } + + pub fn is_inner_tcp(&self) -> bool { + matches!(self.inner_ulp, Some(Ulp::Tcp(_))) + } + + /// Return whether the IP layer has a checksum both structurally + /// and that it is non-zero (i.e., not offloaded). + pub fn has_ip_csum(&self) -> bool { + match &self.inner_l3 { + Some(L3::Ipv4(v4)) => v4.checksum() != 0, + Some(L3::Ipv6(_)) => false, + None => false, + } + } + + /// Return whether the ULP layer has a checksum both structurally + /// and that it is non-zero (i.e., not offloaded). + pub fn has_ulp_csum(&self) -> bool { + let csum = match &self.inner_ulp { + Some(Ulp::Tcp(t)) => t.checksum(), + Some(Ulp::Udp(u)) => u.checksum(), + Some(Ulp::IcmpV4(i4)) => i4.checksum(), + Some(Ulp::IcmpV6(i6)) => i6.checksum(), + None => return false, + }; + + csum != 0 + } +} + /// Helper for conditionally pulling up a packet when required, /// to provide safe read/write access to the packet body. /// @@ -462,7 +548,7 @@ pub struct PacketDataView<'a, T: Read + Pullup> { pub headers: &'a OpteMeta, pub initial_lens: &'a InitialLayerLens, body: &'a PktBodyWalker, - pub stats: &'a mut FlowStatBuilder, + stats: &'a mut FlowStatBuilder, } impl core::fmt::Debug for PacketDataView<'_, T> { @@ -471,97 +557,7 @@ impl core::fmt::Debug for PacketDataView<'_, T> { } } -/// Packet state for the standard ULP path, or a full table walk over the slowpath. -pub struct PacketData { - pub(crate) headers: OpteMeta, - initial_lens: InitialLayerLens, - body: PktBodyWalker, - pub(crate) stats: FlowStatBuilder, -} - impl PacketDataView<'_, T> { - pub fn outer_ether( - &self, - ) -> Option<&InlineHeader>> { - self.headers.outer_eth.as_ref() - } - - pub fn outer_ip(&self) -> Option<&L3> { - self.headers.outer_l3.as_ref() - } - - /// Returns whether this packet is sourced from outside the rack, - /// in addition to its VNI. - pub fn outer_encap_geneve_vni_and_origin(&self) -> Option<(Vni, bool)> { - match &self.headers.outer_encap { - Some(InlineHeader::Repr(EncapMeta::Geneve(g))) => { - Some((g.vni, g.oxide_external_pkt)) - } - Some(InlineHeader::Raw(ValidEncapMeta::Geneve(_, g))) => { - Some((g.vni(), valid_geneve_has_oxide_external(g))) - } - None => None, - } - } - - pub fn inner_ether(&self) -> &EthernetPacket { - &self.headers.inner_eth - } - - pub fn inner_l3(&self) -> Option<&L3> { - self.headers.inner_l3.as_ref() - } - - pub fn inner_ulp(&self) -> Option<&Ulp> { - self.headers.inner_ulp.as_ref() - } - - pub fn inner_ip4(&self) -> Option<&Ipv4Packet> { - self.inner_l3().and_then(|v| match v { - L3::Ipv4(v) => Some(v), - _ => None, - }) - } - - pub fn inner_ip6(&self) -> Option<&Ipv6Packet> { - self.inner_l3().and_then(|v| match v { - L3::Ipv6(v) => Some(v), - _ => None, - }) - } - - pub fn inner_icmp(&self) -> Option<&IcmpV4Packet> { - self.inner_ulp().and_then(|v| match v { - Ulp::IcmpV4(v) => Some(v), - _ => None, - }) - } - - pub fn inner_icmp6(&self) -> Option<&IcmpV6Packet> { - self.inner_ulp().and_then(|v| match v { - Ulp::IcmpV6(v) => Some(v), - _ => None, - }) - } - - pub fn inner_tcp(&self) -> Option<&TcpPacket> { - self.inner_ulp().and_then(|v| match v { - Ulp::Tcp(v) => Some(v), - _ => None, - }) - } - - pub fn inner_udp(&self) -> Option<&UdpPacket> { - self.inner_ulp().and_then(|v| match v { - Ulp::Udp(v) => Some(v), - _ => None, - }) - } - - pub fn is_inner_tcp(&self) -> bool { - matches!(self.inner_ulp(), Some(Ulp::Tcp(_))) - } - pub fn body(&self) -> &[u8] where T::Chunk: ByteSliceMut, @@ -588,42 +584,31 @@ impl PacketDataView<'_, T> { buf.extend_from_slice(base); } - /// Return whether the IP layer has a checksum both structurally - /// and that it is non-zero (i.e., not offloaded). - pub fn has_ip_csum(&self) -> bool { - match &self.headers.inner_l3 { - Some(L3::Ipv4(v4)) => v4.checksum() != 0, - Some(L3::Ipv6(_)) => false, - None => false, - } - } - - /// Return whether the ULP layer has a checksum both structurally - /// and that it is non-zero (i.e., not offloaded). - pub fn has_ulp_csum(&self) -> bool { - let csum = match &self.headers.inner_ulp { - Some(Ulp::Tcp(t)) => t.checksum(), - Some(Ulp::Udp(u)) => u.checksum(), - Some(Ulp::IcmpV4(i4)) => i4.checksum(), - Some(Ulp::IcmpV6(i6)) => i6.checksum(), - None => return false, - }; - - csum != 0 - } - - /// Push a rootstat - /// TODO::::::::: + /// Push a stat object for this layer of packet processing, in addition to + /// that of the current rule. This allows one rule to be associated with + /// several control-plane level objects, and associate states with each + /// as needed. /// - /// Need to rethink this. This *should* be &mut, but we don't - /// want anything else in here to be mut to protect OPTE's design - /// (i.e., actions don't *actually* modify packets). So we maybe - /// need a view type preventing mut use of the other fields? + /// Dataplane designs should avoid pushing the same root stat in multiple + /// layers -- see the commentary [`FlowStatBuilder`]. pub fn push_stat(&mut self, stat: Arc) { self.stats.push(stat.into()); } } +/// Packet state for the standard ULP path, or a full table walk over the slowpath. +/// +/// This type should not be used in or handed to OPTE actions, as its fields have +/// different intended levels of mutability when generating an action. For instance, +/// stats can be created and pushed at will, but packet fields/lengths/body contents +/// should be immutable outside of constructed header/body transforms. +pub(crate) struct PacketData { + pub(crate) headers: OpteMeta, + pub(crate) initial_lens: InitialLayerLens, + body: PktBodyWalker, + pub(crate) stats: FlowStatBuilder, +} + impl From> for OpteMeta { #[inline] fn from(value: NoEncap) -> Self { @@ -645,7 +630,7 @@ impl core::fmt::Debug for PacketData { } impl PacketData { - pub fn view(&mut self) -> PacketDataView { + pub fn view(&mut self) -> PacketDataView<'_, T> { PacketDataView { headers: &self.headers, initial_lens: &self.initial_lens, @@ -653,160 +638,12 @@ impl PacketData { stats: &mut self.stats, } } - - pub fn prep_body(&mut self) - where - T::Chunk: ByteSliceMut, - T: Pullup, - { - self.body.prepare() - } - - pub fn body(&self) -> &[u8] - where - T::Chunk: ByteSliceMut, - T: Pullup, - { - self.body.body() - } - - pub fn body_mut(&mut self) -> &mut [u8] - where - T::Chunk: ByteSliceMut, - T: Pullup, - { - self.body.body_mut() - } - - pub fn outer_ether( - &self, - ) -> Option<&InlineHeader>> { - self.headers.outer_eth.as_ref() - } - - pub fn outer_ip(&self) -> Option<&L3> { - self.headers.outer_l3.as_ref() - } - - /// Returns whether this packet is sourced from outside the rack, - /// in addition to its VNI. - pub fn outer_encap_geneve_vni_and_origin(&self) -> Option<(Vni, bool)> { - match &self.headers.outer_encap { - Some(InlineHeader::Repr(EncapMeta::Geneve(g))) => { - Some((g.vni, g.oxide_external_pkt)) - } - Some(InlineHeader::Raw(ValidEncapMeta::Geneve(_, g))) => { - Some((g.vni(), valid_geneve_has_oxide_external(g))) - } - None => None, - } - } - - pub fn inner_ether(&self) -> &EthernetPacket { - &self.headers.inner_eth - } - - pub fn inner_l3(&self) -> Option<&L3> { - self.headers.inner_l3.as_ref() - } - - pub fn inner_ulp(&self) -> Option<&Ulp> { - self.headers.inner_ulp.as_ref() - } - - pub fn inner_ip4(&self) -> Option<&Ipv4Packet> { - self.inner_l3().and_then(|v| match v { - L3::Ipv4(v) => Some(v), - _ => None, - }) - } - - pub fn inner_ip6(&self) -> Option<&Ipv6Packet> { - self.inner_l3().and_then(|v| match v { - L3::Ipv6(v) => Some(v), - _ => None, - }) - } - - pub fn inner_icmp(&self) -> Option<&IcmpV4Packet> { - self.inner_ulp().and_then(|v| match v { - Ulp::IcmpV4(v) => Some(v), - _ => None, - }) - } - - pub fn inner_icmp6(&self) -> Option<&IcmpV6Packet> { - self.inner_ulp().and_then(|v| match v { - Ulp::IcmpV6(v) => Some(v), - _ => None, - }) - } - - pub fn inner_tcp(&self) -> Option<&TcpPacket> { - self.inner_ulp().and_then(|v| match v { - Ulp::Tcp(v) => Some(v), - _ => None, - }) - } - - pub fn inner_udp(&self) -> Option<&UdpPacket> { - self.inner_ulp().and_then(|v| match v { - Ulp::Udp(v) => Some(v), - _ => None, - }) - } - - pub fn is_inner_tcp(&self) -> bool { - matches!(self.inner_ulp(), Some(Ulp::Tcp(_))) - } - - pub fn copy_remaining(&self) -> Vec - where - T::Chunk: ByteSliceMut, - T: Pullup, - { - let base = self.body(); - base.to_vec() - } - - pub fn append_remaining(&self, buf: &mut Vec) - where - T::Chunk: ByteSliceMut, - T: Pullup, - { - let base = self.body(); - buf.extend_from_slice(base); - } - - /// Return whether the IP layer has a checksum both structurally - /// and that it is non-zero (i.e., not offloaded). - pub fn has_ip_csum(&self) -> bool { - match &self.headers.inner_l3 { - Some(L3::Ipv4(v4)) => v4.checksum() != 0, - Some(L3::Ipv6(_)) => false, - None => false, - } - } - - /// Return whether the ULP layer has a checksum both structurally - /// and that it is non-zero (i.e., not offloaded). - pub fn has_ulp_csum(&self) -> bool { - let csum = match &self.headers.inner_ulp { - Some(Ulp::Tcp(t)) => t.checksum(), - Some(Ulp::Udp(u)) => u.checksum(), - Some(Ulp::IcmpV4(i4)) => i4.checksum(), - Some(Ulp::IcmpV6(i6)) => i6.checksum(), - None => return false, - }; - - csum != 0 - } } -impl From<&PacketData> for InnerFlowId { +impl From<&OpteMeta> for InnerFlowId { #[inline] - fn from(meta: &PacketData) -> Self { - let (proto, addrs) = match meta.inner_l3() { + fn from(meta: &OpteMeta) -> Self { + let (proto, addrs) = match &meta.inner_l3 { Some(L3::Ipv4(pkt)) => ( pkt.protocol().0, AddrPair::V4 { src: pkt.source(), dst: pkt.destination() }, @@ -818,7 +655,7 @@ impl From<&PacketData> for InnerFlowId { None => (255, FLOW_ID_DEFAULT.addrs), }; - let proto_info = match meta.inner_ulp() { + let proto_info = match &meta.inner_ulp { Some(Ulp::Tcp(t)) => { PortInfo { src_port: t.source(), dst_port: t.destination() } .into() @@ -967,12 +804,12 @@ where } #[inline] - pub fn meta(&self) -> &M { + pub fn headers(&self) -> &M { &self.state.meta.headers } #[inline] - pub fn meta_mut(&mut self) -> &mut M { + pub fn headers_mut(&mut self) -> &mut M { &mut self.state.meta.headers } @@ -988,20 +825,24 @@ where #[inline] pub fn flow(&self) -> InnerFlowId { - self.meta().flow() + self.headers().flow() } } impl Packet> { - pub fn meta_view(&mut self) -> PacketDataView { + pub fn meta(&mut self) -> PacketDataView<'_, T> { self.state.meta.view() } - pub fn meta(&self) -> &PacketData { + pub fn headers(&self) -> &OpteMeta { + &self.state.meta.headers + } + + pub(crate) fn meta_internal(&self) -> &PacketData { &self.state.meta } - pub fn meta_mut(&mut self) -> &mut PacketData { + pub(crate) fn meta_internal_mut(&mut self) -> &mut PacketData { &mut self.state.meta } @@ -1242,7 +1083,7 @@ impl Packet> { where T::Chunk: ByteSliceMut, { - self.state.inner_csum_dirty |= xform.run(&mut self.state.meta)?; + self.state.inner_csum_dirty |= xform.run(self)?; // Recomputing this is a little bit wasteful, since we're moving // rebuilding a static repr from packet fields. This is a necessary @@ -1251,7 +1092,7 @@ impl Packet> { // // We *could* elide this on non-compiled UFT transforms, but we do not // need those today. - self.state.flow = InnerFlowId::from(self.meta()); + self.state.flow = InnerFlowId::from(self.headers()); Ok(()) } @@ -1274,7 +1115,7 @@ impl Packet> { self.state.body_modified = true; self.state.meta.body.prepare(); - let ulp = self.state.meta.inner_ulp().map(|v| v.repr()); + let ulp = self.state.meta.headers.inner_ulp.as_ref().map(|v| v.repr()); match self.body_mut() { Some(body_segs) => xform.run(dir, ulp.as_ref(), body_segs), @@ -1291,7 +1132,7 @@ impl Packet> { T::Chunk: ByteSliceMut, T: Pullup, { - let out = self.state.meta.body(); + let out = self.state.meta.body.body(); if out.is_empty() { None } else { Some(out) } } @@ -1301,10 +1142,32 @@ impl Packet> { T::Chunk: ByteSliceMut, T: Pullup, { - let out = self.state.meta.body_mut(); + let out = self.state.meta.body.body_mut(); if out.is_empty() { None } else { Some(out) } } + #[cfg(any(test, feature = "std"))] + pub fn append_remaining(&self, buf: &mut Vec) + where + T::Chunk: ByteSliceMut, + T: Pullup, + { + let base = self.body(); + if let Some(base) = base { + buf.extend_from_slice(base); + } + } + + #[cfg(any(test, feature = "std"))] + pub fn copy_remaining(&self) -> Vec + where + T::Chunk: ByteSliceMut, + T: Pullup, + { + let base = self.body(); + if let Some(base) = base { base.to_vec() } else { vec![] } + } + #[inline] pub fn mblk_addr(&self) -> uintptr_t { self.state.base_ptr @@ -1433,8 +1296,8 @@ impl Packet> { // provided. If the checksum is zero, it's assumed heardware // checksum offload is being used, and OPTE should not update // the checksum. - let update_ip = self.state.meta.has_ip_csum(); - let update_ulp = self.state.meta.has_ulp_csum(); + let update_ip = self.state.meta.headers.has_ip_csum(); + let update_ulp = self.state.meta.headers.has_ulp_csum(); // We expect that any body transform will necessarily invalidate // the body_csum. Recompute from scratch. @@ -1588,7 +1451,7 @@ impl> LiteParsed {} // These are needed for now to account for not wanting to redesign // ActionDescs to be generic over T (trait object safety rules, etc.), // in addition to needing to rework Hairpin actions. -pub type MblkPacketData<'a> = PacketData>; +pub(crate) type MblkPacketData<'a> = PacketData>; pub type MblkPacketDataView<'a, 'b> = PacketDataView<'a, MsgBlkIterMut<'b>>; pub type MblkFullParsed<'a> = FullParsed>; pub type MblkLiteParsed<'a, M> = LiteParsed, M>; @@ -1971,17 +1834,19 @@ mod test { .unwrap() .to_full_meta(); - let eth_meta = parsed.meta().inner_ether(); + let headers = parsed.headers(); + + let eth_meta = &headers.inner_eth; assert_eq!(eth_meta.destination(), DST_MAC); assert_eq!(eth_meta.source(), SRC_MAC); assert_eq!(eth_meta.ethertype(), Ethertype::IPV4); - let ip4_meta = parsed.meta().inner_ip4().unwrap(); + let ip4_meta = headers.inner_ip4().unwrap(); assert_eq!(ip4_meta.source(), SRC_IP4); assert_eq!(ip4_meta.destination(), DST_IP4); assert_eq!(ip4_meta.protocol(), IpProtocol::TCP); - let tcp_meta = parsed.meta().inner_tcp().unwrap(); + let tcp_meta = headers.inner_tcp().unwrap(); assert_eq!(tcp_meta.source(), 3839); assert_eq!(tcp_meta.destination(), 80); assert_eq!(tcp_meta.flags(), TcpFlags::SYN); @@ -2021,17 +1886,17 @@ mod test { .unwrap() .to_full_meta(); - let eth_parsed = pkt.meta().inner_ether(); + let eth_parsed = &pkt.headers().inner_eth; assert_eq!(eth_parsed.destination(), DST_MAC); assert_eq!(eth_parsed.source(), SRC_MAC); assert_eq!(eth_parsed.ethertype(), Ethertype::IPV4); - let ip4_parsed = pkt.meta().inner_ip4().unwrap(); + let ip4_parsed = pkt.headers().inner_ip4().unwrap(); assert_eq!(ip4_parsed.source(), SRC_IP4); assert_eq!(ip4_parsed.destination(), DST_IP4); assert_eq!(ip4_parsed.protocol(), IpProtocol::TCP); - let tcp_parsed = pkt.meta().inner_tcp().unwrap(); + let tcp_parsed = pkt.headers().inner_tcp().unwrap(); assert_eq!(tcp_parsed.source(), 3839); assert_eq!(tcp_parsed.destination(), 80); assert_eq!(tcp_parsed.flags(), TcpFlags::SYN); @@ -2114,10 +1979,14 @@ mod test { // Assert that the packet parses back out, and we can reach // the TCP meta no matter which permutation of EHs we have. assert_eq!( - pkt.meta().inner_ip6().unwrap().v6ext_ref().packet_length(), + pkt.headers() + .inner_ip6() + .unwrap() + .v6ext_ref() + .packet_length(), ipv6_header_size - Ipv6::MINIMUM_LENGTH ); - let tcp_meta = pkt.meta().inner_tcp().unwrap(); + let tcp_meta = pkt.headers().inner_tcp().unwrap(); assert_eq!(tcp_meta.source(), 3839); assert_eq!(tcp_meta.destination(), 80); assert_eq!(tcp_meta.sequence(), 4224936861); @@ -2161,8 +2030,8 @@ mod test { .to_full_meta(); // Grab parsed metadata - let ip4_meta = parsed.meta().inner_ip4().unwrap(); - let tcp_meta = parsed.meta().inner_tcp().unwrap(); + let ip4_meta = parsed.headers().inner_ip4().unwrap(); + let tcp_meta = parsed.headers().inner_tcp().unwrap(); // Length in packet headers shouldn't reflect include padding // This should not fail even though there are more bytes in @@ -2220,8 +2089,8 @@ mod test { .to_full_meta(); // Grab parsed metadata - let ip6_meta = pkt.meta().inner_ip6().unwrap(); - let udp_meta = pkt.meta().inner_udp().unwrap(); + let ip6_meta = pkt.headers().inner_ip6().unwrap(); + let udp_meta = pkt.headers().inner_udp().unwrap(); // Length in packet headers shouldn't reflect include padding assert_eq!( diff --git a/lib/opte/src/engine/port/mod.rs b/lib/opte/src/engine/port/mod.rs index bdbc556a..41281bd0 100644 --- a/lib/opte/src/engine/port/mod.rs +++ b/lib/opte/src/engine/port/mod.rs @@ -932,7 +932,7 @@ impl Port { if unsafe { super::opte_panic_debug != 0 } { super::err!("mblk: {}", pkt.mblk_addr()); super::err!("flow: {}", pkt.flow()); - super::err!("meta: {:?}", pkt.meta()); + super::err!("meta: {:?}", pkt.meta_internal()); super::err!("flows: {:?}", data); todo!("bad packet: {}", msg); } else { @@ -1415,7 +1415,7 @@ impl Port { tcp_flow.hit_at(process_start); let tcp = pkt - .meta() + .headers() .inner_tcp() .expect("failed to find TCP state on known TCP flow"); @@ -1486,7 +1486,7 @@ impl Port { let tx = entry.state().xforms.compiled.as_ref().cloned().unwrap(); let len = pkt.len(); - let meta = pkt.meta_mut(); + let meta = pkt.headers_mut(); let csum_dirty = tx.checksums_dirty(); let body_csum = @@ -2319,14 +2319,14 @@ impl Port { // ID, therefore we mirror the flow. This value must represent // the guest-side of the flow and thus come from the passed-in // packet metadata that represents the post-processed packet. - let ufid_out = InnerFlowId::from(pmeta).mirror(); + let ufid_out = InnerFlowId::from(&pmeta.headers).mirror(); // Unwrap: We know this is a TCP packet at this point. // // XXX This will be even more foolproof in the future when // we've implemented the notion of FlowSet and Packet is // generic on header group/flow type. - let tcp = pmeta.inner_tcp().unwrap(); + let tcp = pmeta.headers.inner_tcp().unwrap(); let dir = TcpDirection::In { ufid_in, ufid_out: &ufid_out }; @@ -2393,7 +2393,7 @@ impl Port { }; let pkt_len = pkt.len() as u64; - let Some(stat_parents) = pkt.meta_mut().stats.terminate( + let Some(stat_parents) = pkt.meta_internal_mut().stats.terminate( ipr.stat_action(), pkt_len, In, @@ -2449,8 +2449,13 @@ impl Port { // For inbound traffic the TCP flow table must be // checked _after_ processing take place. - if pkt.meta().is_inner_tcp() { - match self.process_in_tcp(data, pkt.meta(), ufid_in, pkt_len) { + if pkt.meta_internal().headers.is_inner_tcp() { + match self.process_in_tcp( + data, + pkt.meta_internal(), + ufid_in, + pkt_len, + ) { Ok(TcpMaybeClosed::Closed { .. }) => { Ok(InternalProcessResult::Modified) } @@ -2546,7 +2551,7 @@ impl Port { pmeta: &MblkPacketData, pkt_len: u64, ) -> result::Result { - let tcp = pmeta.inner_tcp().unwrap(); + let tcp = pmeta.headers.inner_tcp().unwrap(); let dir = TcpDirection::Out { ufid_out }; match self.update_tcp_entry(data, tcp, &dir, pkt_len) { @@ -2579,11 +2584,11 @@ impl Port { // For outbound traffic the TCP flow table must be checked // _before_ processing take place. - let tcp_flow = if pkt.meta().is_inner_tcp() { + let tcp_flow = if pkt.meta_internal().headers.is_inner_tcp() { match self.process_out_tcp_new( data, pkt.flow(), - pkt.meta(), + pkt.meta_internal(), pkt_len, ) { Ok(TcpMaybeClosed::Closed { ufid_inbound }) => { @@ -2678,7 +2683,7 @@ impl Port { Err(e) => return Err(ProcessError::Layer(e)), }; - let Some(stat_parents) = pkt.meta_mut().stats.terminate( + let Some(stat_parents) = pkt.meta_internal_mut().stats.terminate( ipr.stat_action(), pkt_len, Out, diff --git a/lib/opte/src/engine/predicate.rs b/lib/opte/src/engine/predicate.rs index 551f2179..07d21621 100644 --- a/lib/opte/src/engine/predicate.rs +++ b/lib/opte/src/engine/predicate.rs @@ -21,7 +21,8 @@ use super::ip::v6::Ipv6Addr; use super::ip::v6::Ipv6Cidr; use super::ip::v6::Ipv6Ref; use super::ip::v6::v6_get_next_header; -use super::packet::MblkPacketData; +use super::packet::MblkFullParsed; +use super::packet::Packet; use super::port::meta::ActionMeta; use alloc::boxed::Box; use alloc::string::String; @@ -389,11 +390,13 @@ impl Display for Predicate { } impl Predicate { - pub(crate) fn is_match( + pub fn is_match( &self, - meta: &MblkPacketData, + pkt: &Packet, action_meta: &ActionMeta, ) -> bool { + let headers = pkt.headers(); + match self { Self::Meta(key, pred_val) => { if let Some(meta_val) = action_meta.get(key) { @@ -403,20 +406,20 @@ impl Predicate { return false; } - Self::Not(pred) => return !pred.is_match(meta, action_meta), + Self::Not(pred) => return !pred.is_match(pkt, action_meta), Self::Any(list) => { - return list.iter().any(|v| v.is_match(meta, action_meta)); + return list.iter().any(|v| v.is_match(pkt, action_meta)); } Self::All(list) => { - return list.iter().all(|v| v.is_match(meta, action_meta)); + return list.iter().all(|v| v.is_match(pkt, action_meta)); } Self::InnerEtherType(list) => { for m in list { if m.matches(EtherType::from( - meta.inner_ether().ethertype().0, + headers.inner_eth.ethertype().0, )) { return true; } @@ -425,7 +428,7 @@ impl Predicate { Self::InnerEtherDst(list) => { for m in list { - if m.matches(meta.inner_ether().destination()) { + if m.matches(headers.inner_eth.destination()) { return true; } } @@ -433,13 +436,13 @@ impl Predicate { Self::InnerEtherSrc(list) => { for m in list { - if m.matches(meta.inner_ether().source()) { + if m.matches(headers.inner_eth.source()) { return true; } } } - Self::InnerIpProto(list) => match meta.inner_l3() { + Self::InnerIpProto(list) => match &headers.inner_l3 { None => return false, Some(L3::Ipv4(ipv4)) => { @@ -467,7 +470,7 @@ impl Predicate { } }, - Self::InnerSrcIp4(list) => match meta.inner_ip4() { + Self::InnerSrcIp4(list) => match headers.inner_ip4() { Some(v4) => { let ip = v4.source(); for m in list { @@ -482,7 +485,7 @@ impl Predicate { _ => return false, }, - Self::InnerDstIp4(list) => match meta.inner_ip4() { + Self::InnerDstIp4(list) => match headers.inner_ip4() { Some(v4) => { let ip = v4.destination(); for m in list { @@ -497,7 +500,7 @@ impl Predicate { _ => return false, }, - Self::InnerSrcIp6(list) => match meta.inner_ip6() { + Self::InnerSrcIp6(list) => match headers.inner_ip6() { Some(v6) => { let ip = v6.source(); for m in list { @@ -509,7 +512,7 @@ impl Predicate { _ => return false, }, - Self::InnerDstIp6(list) => match meta.inner_ip6() { + Self::InnerDstIp6(list) => match headers.inner_ip6() { Some(v6) => { let ip = v6.destination(); for m in list { @@ -522,7 +525,7 @@ impl Predicate { }, Self::InnerSrcPort(list) => { - match meta.inner_ulp().and_then(|v| v.src_port()) { + match headers.inner_ulp.as_ref().and_then(|v| v.src_port()) { // No ULP metadata or no source port (e.g. ICMPv6). None => return false, @@ -537,7 +540,7 @@ impl Predicate { } Self::InnerDstPort(list) => { - match meta.inner_ulp().and_then(|v| v.dst_port()) { + match headers.inner_ulp.as_ref().and_then(|v| v.dst_port()) { // No ULP metadata or no destination port (e.g. ICMPv6). None => return false, @@ -552,7 +555,7 @@ impl Predicate { } Self::IcmpMsgType(list) => { - let Some(icmp) = meta.inner_icmp() else { + let Some(icmp) = headers.inner_icmp() else { // This isn't an ICMPv4 packet at all return false; }; @@ -565,7 +568,7 @@ impl Predicate { } Self::IcmpMsgCode(list) => { - let Some(icmp) = meta.inner_icmp() else { + let Some(icmp) = headers.inner_icmp() else { // This isn't an ICMPv4 packet at all return false; }; @@ -578,7 +581,7 @@ impl Predicate { } Self::Icmpv6MsgType(list) => { - let Some(icmp6) = meta.inner_icmp6() else { + let Some(icmp6) = headers.inner_icmp6() else { // This isn't an ICMPv6 packet at all return false; }; @@ -591,7 +594,7 @@ impl Predicate { } Self::Icmpv6MsgCode(list) => { - let Some(icmp6) = meta.inner_icmp6() else { + let Some(icmp6) = headers.inner_icmp6() else { // This isn't an ICMPv6 packet at all return false; }; @@ -686,12 +689,12 @@ impl DataPredicate { // use `PacketMeta` to determine if there is a suitable payload to // be inspected. That is, if there is no metadata for a given // header, there is certainly no payload. - pub(crate) fn is_match(&self, meta: &MblkPacketData) -> bool { + pub(crate) fn is_match(&self, meta: &Packet) -> bool { match self { Self::Not(pred) => !pred.is_match(meta), Self::DhcpMsgType(mt) => { - let bytes = meta.body(); + let bytes = meta.body().unwrap_or_default(); let pkt = match DhcpPacket::new_checked(&bytes) { Ok(v) => v, @@ -716,17 +719,15 @@ impl DataPredicate { mt.is_match(&DhcpMessageType::from(dhcp.message_type)) } - Self::Dhcpv6MsgType(mt) => { - let body = meta.body(); - if body.is_empty() { + Self::Dhcpv6MsgType(mt) => match meta.body() { + Some(body) => mt.is_match(&body[0].into()), + None => { super::err!( "Failed to read DHCPv6 message type from packet" ); false - } else { - mt.is_match(&body[0].into()) } - } + }, } } } diff --git a/lib/opte/src/engine/rule.rs b/lib/opte/src/engine/rule.rs index a1dcc9d4..692c7426 100644 --- a/lib/opte/src/engine/rule.rs +++ b/lib/opte/src/engine/rule.rs @@ -28,10 +28,11 @@ use super::ip::v4::Ipv4Mut; use super::ip::v6::Ipv6Mut; use super::ip::v6::v6_set_next_header; use super::packet::BodyTransform; +use super::packet::FullParsed; use super::packet::InnerFlowId; -use super::packet::MblkPacketData; +use super::packet::MblkFullParsed; use super::packet::MblkPacketDataView; -use super::packet::PacketData; +use super::packet::Packet; use super::packet::Pullup; use super::parse::ValidUlp; use super::port::meta::ActionMeta; @@ -611,11 +612,13 @@ impl HdrTransform { /// [`HdrTransformError::MissingHeader`] is returned. pub fn run( &self, - meta: &mut PacketData, + pkt: &mut Packet>, ) -> Result where T::Chunk: ByteSliceMut, { + let meta = pkt.meta_internal_mut(); + self.outer_ether .act_on_option::>, _>( &mut meta.headers.outer_eth, @@ -1104,7 +1107,7 @@ impl Rule { impl Rule { pub fn is_match( &self, - meta: &MblkPacketData, + pkt: &Packet, action_meta: &ActionMeta, ) -> bool { #[cfg(debug_assertions)] @@ -1126,13 +1129,13 @@ impl Rule { Some(preds) => { for p in &preds.hdr_preds { - if !p.is_match(meta, action_meta) { + if !p.is_match(pkt, action_meta) { return false; } } for p in &preds.data_preds { - if !p.is_match(meta) { + if !p.is_match(pkt) { return false; } } @@ -1207,7 +1210,6 @@ fn rule_matching() { .unwrap() .to_full_meta(); pkt.compute_checksums(); - let meta = pkt.meta(); r1.add_predicate(Predicate::InnerSrcIp4(vec![Ipv4AddrMatch::Exact( src_ip, @@ -1215,14 +1217,14 @@ fn rule_matching() { let r1 = r1.finalize(); let ameta = ActionMeta::new(); - assert!(r1.is_match(meta, &ameta)); + assert!(r1.is_match(&pkt, &ameta)); let new_src_ip = "10.11.11.99".parse().unwrap(); - let meta = pkt.meta_mut(); + let meta = pkt.meta_internal_mut(); if let Some(L3::Ipv4(v4)) = &mut meta.headers.inner_l3 { v4.set_source(new_src_ip); } - assert!(!r1.is_match(meta, &ameta)); + assert!(!r1.is_match(&pkt, &ameta)); } diff --git a/lib/opte/src/engine/snat.rs b/lib/opte/src/engine/snat.rs index 855d4937..84363fde 100644 --- a/lib/opte/src/engine/snat.rs +++ b/lib/opte/src/engine/snat.rs @@ -242,11 +242,14 @@ impl SNat { fn gen_icmp_desc( &self, nat: SNatAlloc, - pkt: MblkPacketDataView, + meta: MblkPacketDataView, ) -> GenDescResult { let echo_ident = match T::MESSAGE_PROTOCOL { Protocol::ICMP => { - let icmp = pkt.inner_icmp().ok_or(GenIcmpErr::MetaNotFound)?; + let icmp = meta + .headers + .inner_icmp() + .ok_or(GenIcmpErr::MetaNotFound)?; Ok(if icmp.ty() == IcmpV4Type::ECHO_REQUEST { icmp.echo_id() @@ -255,8 +258,10 @@ impl SNat { }) } Protocol::ICMPv6 => { - let icmp6 = - pkt.inner_icmp6().ok_or(GenIcmpErr::MetaNotFound)?; + let icmp6 = meta + .headers + .inner_icmp6() + .ok_or(GenIcmpErr::MetaNotFound)?; Ok(if icmp6.ty() == IcmpV6Type::ECHO_REQUEST { icmp6.echo_id() @@ -557,38 +562,32 @@ mod test { // ================================================================ // Verify descriptor generation. // ================================================================ - let flow_out = InnerFlowId::from(pkt.meta()); - let desc = - match snat.gen_desc(&flow_out, pkt.meta_view(), &mut action_meta) { - Ok(AllowOrDeny::Allow(desc)) => desc, - _ => panic!("expected AllowOrDeny::Allow(desc) result"), - }; + let flow_out = InnerFlowId::from(pkt.headers()); + let desc = match snat.gen_desc(&flow_out, pkt.meta(), &mut action_meta) + { + Ok(AllowOrDeny::Allow(desc)) => desc, + _ => panic!("expected AllowOrDeny::Allow(desc) result"), + }; assert!(!snat.tcp_pool.verify_available(priv_ip, pub_ip, pub_port)); // ================================================================ // Verify outbound header transformation // ================================================================ let out_ht = desc.gen_ht(Direction::Out); - out_ht.run(pkt.meta_mut()).unwrap(); + out_ht.run(&mut pkt).unwrap(); - let pmo = pkt.meta(); - let ether_meta = pmo.inner_ether(); + let pmo = pkt.headers(); + let ether_meta = &pmo.inner_eth; assert_eq!(ether_meta.source(), priv_mac); assert_eq!(ether_meta.destination(), dest_mac); - let ip4_meta = match pmo.inner_ip4() { - Some(v) => v, - _ => panic!("expect Ipv4Meta"), - }; + let ip4_meta = pmo.inner_ip4().unwrap(); assert_eq!(ip4_meta.source(), pub_ip); assert_eq!(ip4_meta.destination(), outside_ip); assert_eq!(ip4_meta.protocol(), IpProtocol::TCP); - let tcp_meta = match pmo.inner_tcp() { - Some(v) => v, - _ => panic!("expect TcpMeta"), - }; + let tcp_meta = pmo.inner_tcp().unwrap(); assert_eq!(tcp_meta.source(), pub_port); assert_eq!(tcp_meta.destination(), outside_port); @@ -623,26 +622,20 @@ mod test { pkt.compute_checksums(); let in_ht = desc.gen_ht(Direction::In); - in_ht.run(pkt.meta_mut()).unwrap(); + in_ht.run(&mut pkt).unwrap(); - let pmi = pkt.meta(); - let ether_meta = pmi.inner_ether(); + let pmi = pkt.headers(); + let ether_meta = &pmi.inner_eth; assert_eq!(ether_meta.source(), dest_mac); assert_eq!(ether_meta.destination(), priv_mac); - let ip4_meta = match pmi.inner_ip4() { - Some(v) => v, - _ => panic!("expect Ipv4Meta"), - }; + let ip4_meta = pmi.inner_ip4().unwrap(); assert_eq!(ip4_meta.source(), outside_ip); assert_eq!(ip4_meta.destination(), priv_ip); assert_eq!(ip4_meta.protocol(), IpProtocol::TCP); - let tcp_meta = match pmi.inner_tcp() { - Some(v) => v, - _ => panic!("expect TcpMeta"), - }; + let tcp_meta = pmi.inner_tcp().unwrap(); assert_eq!(tcp_meta.source(), outside_port); assert_eq!(tcp_meta.destination(), priv_port); diff --git a/lib/opte/src/engine/stat.rs b/lib/opte/src/engine/stat.rs index 20f0217a..abadd4e3 100644 --- a/lib/opte/src/engine/stat.rs +++ b/lib/opte/src/engine/stat.rs @@ -478,8 +478,6 @@ impl From<&InternalStat> for ApiFullCounter { } /// Manager of all stat/counter objects within a port. -/// -/// #[derive(Default)] pub struct StatTree { next_id: u64, @@ -658,25 +656,25 @@ impl StatTree { #[allow(clippy::mutable_key_type)] let mut parents: BTreeSet = Default::default(); let mut base_stats = None; - if let Liveness::Seen(id) = v.lhs { - if let Some(flow) = self.flows.remove(&id) { - let flow = Arc::into_inner(flow) - .expect("strong count 1 is enforced above"); - for p_id in flow.parents { - parents.insert(ById(p_id)); - } - base_stats = Some(flow.shared); + if let Liveness::Seen(id) = v.lhs + && let Some(flow) = self.flows.remove(&id) + { + let flow = Arc::into_inner(flow) + .expect("strong count 1 is enforced above"); + for p_id in flow.parents { + parents.insert(ById(p_id)); } + base_stats = Some(flow.shared); } - if let Liveness::Seen(id) = v.rhs { - if let Some(flow) = self.flows.remove(&id) { - let flow = Arc::into_inner(flow) - .expect("strong count 1 is enforced above"); - for p_id in flow.parents { - parents.insert(ById(p_id)); - } - base_stats = Some(flow.shared); + if let Liveness::Seen(id) = v.rhs + && let Some(flow) = self.flows.remove(&id) + { + let flow = Arc::into_inner(flow) + .expect("strong count 1 is enforced above"); + for p_id in flow.parents { + parents.insert(ById(p_id)); } + base_stats = Some(flow.shared); } // At long last, combine! @@ -775,7 +773,7 @@ impl StatTree { stat.parents.iter().map(|v| v.global_id()).collect(); out.push_str(&format!("\t{id}/{} ->\n", stat.dir)); out.push_str(&format!("\t\t{:?} {d:?}\n", stat.shared.stats.id)); - out.push_str(&format!("\t\tparents {:?}\n", parents)); + out.push_str(&format!("\t\tparents {parents:?}\n")); out.push_str(&format!("\t\tbases {:?}\n\n", stat.bases)); } out.push_str("----\n"); diff --git a/lib/oxide-vpc/src/engine/mod.rs b/lib/oxide-vpc/src/engine/mod.rs index c8f6fbf8..d41d1114 100644 --- a/lib/oxide-vpc/src/engine/mod.rs +++ b/lib/oxide-vpc/src/engine/mod.rs @@ -106,7 +106,7 @@ impl NetworkImpl for VpcNetwork { where T::Chunk: ByteSliceMut + IntoBufPointer<'a>, { - match (dir, pkt.meta().inner_ether().ethertype()) { + match (dir, pkt.meta().headers.inner_eth.ethertype()) { (Direction::Out, Ethertype::ARP) => self.handle_arp_out(pkt), _ => Ok(HdlPktAction::Deny), diff --git a/lib/oxide-vpc/src/engine/overlay.rs b/lib/oxide-vpc/src/engine/overlay.rs index e961eeca..f9d26e51 100644 --- a/lib/oxide-vpc/src/engine/overlay.rs +++ b/lib/oxide-vpc/src/engine/overlay.rs @@ -390,7 +390,7 @@ impl StaticAction for DecapAction { pkt_meta: MblkPacketDataView, action_meta: &mut ActionMeta, ) -> GenHtResult { - match pkt_meta.outer_encap_geneve_vni_and_origin() { + match pkt_meta.headers.outer_encap_geneve_vni_and_origin() { Some((vni, oxide_external_pkt)) => { // We only conditionally add this metadata because the // `Address::VNI` filter uses it to select VPC-originated diff --git a/lib/oxide-vpc/tests/integration_tests.rs b/lib/oxide-vpc/tests/integration_tests.rs index c1d6c77f..752280ee 100644 --- a/lib/oxide-vpc/tests/integration_tests.rs +++ b/lib/oxide-vpc/tests/integration_tests.rs @@ -400,18 +400,18 @@ fn gateway_icmp4_ping() { // the VpcParser since it would expect any inbound packet to be // encapsulated. pcap.add_pkt(&hp); - // let reply = hp.parse(In, GenericUlp {}).unwrap(); - let reply = parse_inbound(&mut hp, GenericUlp {}).unwrap().to_full_meta(); - let meta = reply.meta(); - assert!(meta.outer_ether().is_none()); - assert!(meta.outer_ip().is_none()); + let mut reply = + parse_inbound(&mut hp, GenericUlp {}).unwrap().to_full_meta(); + let meta = reply.headers(); + assert!(meta.outer_eth.is_none()); + assert!(meta.outer_l3.is_none()); assert!(meta.outer_encap_geneve_vni_and_origin().is_none()); - let eth = meta.inner_ether(); + let eth = &meta.inner_eth; assert_eq!(eth.source(), g1_cfg.gateway_mac); assert_eq!(eth.destination(), g1_cfg.guest_mac); - match meta.inner_l3().as_ref().unwrap() { + match meta.inner_l3.as_ref().unwrap() { L3::Ipv4(ip4) => { assert_eq!(ip4.source(), g1_cfg.ipv4_cfg().unwrap().gateway_ip); assert_eq!( @@ -424,7 +424,8 @@ fn gateway_icmp4_ping() { L3::Ipv6(_) => panic!("expected inner IPv4 metadata, got IPv6"), } - let mut reply_body = meta.inner_ulp().expect("ICMPv4 is a ULP").emit_vec(); + let mut reply_body = + meta.inner_ulp.as_ref().expect("ICMPv4 is a ULP").emit_vec(); reply.meta().append_remaining(&mut reply_body); let reply_pkt = Icmpv4Packet::new_checked(&reply_body).unwrap(); let mut csum = CsumCapab::ignored(); @@ -575,8 +576,8 @@ fn guest_to_guest() { let mut pkt1_m = http_syn(&g1_cfg, &g2_cfg); pcap_guest1.add_pkt(&pkt1_m); let pkt1 = parse_outbound(&mut pkt1_m, VpcParser {}).unwrap(); - let ulp_csum_b4 = pkt1.meta().inner_ulp.as_ref().unwrap().csum(); - let ip_csum_b4 = pkt1.meta().inner_l3.as_ref().unwrap().csum(); + let ulp_csum_b4 = pkt1.headers().inner_ulp.as_ref().unwrap().csum(); + let ip_csum_b4 = pkt1.headers().inner_l3.as_ref().unwrap().csum(); // ================================================================ // Run the packet through g1's port in the outbound direction and @@ -599,12 +600,12 @@ fn guest_to_guest() { assert_eq!(nodes.count(), 2); let pkt2 = parse_inbound(&mut pkt1_m, VpcParser {}).unwrap(); - let ulp_csum_after = pkt2.meta().inner_ulp.csum(); - let ip_csum_after = pkt2.meta().inner_l3.csum(); + let ulp_csum_after = pkt2.headers().inner_ulp.csum(); + let ip_csum_after = pkt2.headers().inner_l3.csum(); assert_eq!(ulp_csum_after, ulp_csum_b4); assert_eq!(ip_csum_after, ip_csum_b4); - let meta = pkt2.meta(); + let meta = pkt2.headers(); assert_eq!(meta.outer_eth.source(), MacAddr::ZERO); assert_eq!(meta.outer_eth.destination(), MacAddr::ZERO); @@ -668,7 +669,7 @@ fn guest_to_guest() { // assert_eq!(pkt2.body_seg(), 0); let pkt2 = parse_outbound(&mut pkt2_m, VpcParser {}).unwrap(); - let g2_meta = pkt2.meta(); + let g2_meta = pkt2.headers(); // TODO: can we have a convenience method that verifies that the // emitspec was a rewind/drop from the head of the pkt? @@ -813,7 +814,7 @@ fn guest_to_internet_ipv4() { // - Geneve // - (Inner ULP headers) let pkt1 = parse_inbound(&mut pkt1_m, VpcParser {}).unwrap(); - let meta = pkt1.meta(); + let meta = pkt1.headers(); assert_eq!(meta.outer_eth.source(), MacAddr::ZERO); assert_eq!(meta.outer_eth.destination(), MacAddr::ZERO); @@ -921,7 +922,7 @@ fn guest_to_internet_ipv6() { ); let pkt1 = parse_inbound(&mut pkt1_m, VpcParser {}).unwrap(); - let meta = pkt1.meta(); + let meta = pkt1.headers(); assert_eq!(meta.outer_eth.source(), MacAddr::ZERO); assert_eq!(meta.outer_eth.destination(), MacAddr::ZERO); @@ -1184,7 +1185,7 @@ fn check_external_ip_inbound_behaviour( .unwrap() .to_full_meta(); assert_eq!( - pkt1.meta().inner_ip4().unwrap().destination(), + pkt1.headers().inner_ip4().unwrap().destination(), private_ip ); } @@ -1197,7 +1198,7 @@ fn check_external_ip_inbound_behaviour( .unwrap() .to_full_meta(); assert_eq!( - pkt1.meta().inner_ip6().unwrap().destination(), + pkt1.headers().inner_ip6().unwrap().destination(), private_ip ); } @@ -1246,12 +1247,14 @@ fn check_external_ip_inbound_behaviour( match ext_ip { IpAddr::Ip4(ip) => { - let chosen_ip = pkt2.meta().inner_ip4().unwrap().source(); + let chosen_ip = + pkt2.headers().inner_ip4().unwrap().source(); assert_ne!(chosen_ip, ip); assert_ne!(IpAddr::from(chosen_ip), private_ip); } IpAddr::Ip6(ip) => { - let chosen_ip = pkt2.meta().inner_ip6().unwrap().source(); + let chosen_ip = + pkt2.headers().inner_ip6().unwrap().source(); assert_ne!(chosen_ip, ip); assert_ne!(IpAddr::from(chosen_ip), private_ip); } @@ -1266,10 +1269,16 @@ fn check_external_ip_inbound_behaviour( ); match ext_ip { IpAddr::Ip4(ip) => { - assert_eq!(pkt2.meta().inner_ip4().unwrap().source(), ip); + assert_eq!( + pkt2.headers().inner_ip4().unwrap().source(), + ip + ); } IpAddr::Ip6(ip) => { - assert_eq!(pkt2.meta().inner_ip6().unwrap().source(), ip); + assert_eq!( + pkt2.headers().inner_ip6().unwrap().source(), + ip + ); } }; } @@ -1334,10 +1343,10 @@ fn external_ip_balanced_over_floating_ips() { match partner_ip { IpAddr::Ip4(_) => { - seen_v4s.push(pkt.meta().inner_ip4().unwrap().source()); + seen_v4s.push(pkt.headers().inner_ip4().unwrap().source()); } IpAddr::Ip6(_) => { - seen_v6s.push(pkt.meta().inner_ip6().unwrap().source()); + seen_v6s.push(pkt.headers().inner_ip6().unwrap().source()); } } } @@ -1469,10 +1478,10 @@ fn external_ip_epoch_affinity_preserved() { parse_inbound(&mut pkt2_m, VpcParser {}).unwrap().to_full_meta(); match ext_ip { IpAddr::Ip4(ip) => { - assert_eq!(pkt2.meta().inner_ip4().unwrap().source(), ip); + assert_eq!(pkt2.headers().inner_ip4().unwrap().source(), ip); } IpAddr::Ip6(ip) => { - assert_eq!(pkt2.meta().inner_ip6().unwrap().source(), ip); + assert_eq!(pkt2.headers().inner_ip6().unwrap().source(), ip); } }; } @@ -1567,7 +1576,7 @@ fn unpack_and_verify_icmp( In => parse_outbound(pkt, VpcParser {}).unwrap().to_full_meta(), Out => parse_inbound(pkt, VpcParser {}).unwrap().to_full_meta(), }; - let meta = parsed.meta(); + let meta = parsed.headers(); let (src_eth, dst_eth, src_ip, dst_ip, ident) = match dir { Direction::Out => ( @@ -1586,11 +1595,11 @@ fn unpack_and_verify_icmp( ), }; - let eth = meta.inner_ether(); + let eth = &meta.inner_eth; assert_eq!(eth.source(), src_eth); assert_eq!(eth.destination(), dst_eth); - match (dst_ip, meta.inner_l3().as_ref().unwrap()) { + match (dst_ip, meta.inner_l3.as_ref().unwrap()) { (IpAddr::Ip4(_), L3::Ipv4(meta)) => { assert_eq!(eth.ethertype(), Ethertype::IPV4); assert_eq!(IpAddr::from(meta.source()), src_ip); @@ -1629,8 +1638,8 @@ fn unpack_and_verify_icmp4( ) { // Because we treat ICMPv4 as a full-fledged ULP, we need to // unsplit the emitted header from the body. - let mut icmp = pkt.meta().inner_ulp().unwrap().emit_vec(); - pkt.meta().append_remaining(&mut icmp); + let mut icmp = pkt.headers().inner_ulp.as_ref().unwrap().emit_vec(); + pkt.append_remaining(&mut icmp); let icmp = Icmpv4Packet::new_checked(&icmp[..]).unwrap(); @@ -1651,8 +1660,8 @@ fn unpack_and_verify_icmp6( // Because we treat ICMPv4 as a full-fledged ULP, we need to // unsplit the emitted header from the body. - let mut icmp = pkt.meta().inner_ulp().unwrap().emit_vec(); - pkt.meta().append_remaining(&mut icmp); + let mut icmp = pkt.headers().inner_ulp.as_ref().unwrap().emit_vec(); + pkt.append_remaining(&mut icmp); let icmp = Icmpv6Packet::new_checked(&icmp[..]).unwrap(); assert!(icmp.verify_checksum(&src_ip, &dst_ip)); @@ -1976,7 +1985,7 @@ fn arp_gateway() { // can't use the VpcParser since it would expect any // inbound packet to be encapsulated. let hppkt = parse_inbound(&mut hppkt, GenericUlp {}).unwrap(); - let meta = hppkt.meta(); + let meta = hppkt.headers(); let ethm = &meta.inner_eth; assert_eq!(ethm.destination(), cfg.guest_mac); assert_eq!(ethm.source(), cfg.gateway_mac); @@ -2107,7 +2116,7 @@ fn test_guest_to_gateway_icmpv6_ping( pcap.add_pkt(&hp); let reply = parse_inbound(&mut hp, GenericUlp {}).unwrap(); - let meta = reply.meta(); + let meta = reply.headers(); let eth = &meta.inner_eth; assert_eq!(eth.source(), g1_cfg.gateway_mac); @@ -2199,7 +2208,7 @@ fn gateway_router_advert_reply() { pcap.add_pkt(&hp); let reply = parse_inbound(&mut hp, GenericUlp {}).unwrap(); - let meta = reply.meta(); + let meta = reply.headers(); let eth = &meta.inner_eth; assert_eq!( @@ -2456,7 +2465,7 @@ fn validate_hairpin_advert( pcap.add_pkt(&hp); let reply = parse_inbound(&mut hp, GenericUlp {}).unwrap(); - let meta = reply.meta(); + let meta = reply.headers(); // Check that the inner MACs are what we expect. let eth = &meta.inner_eth; @@ -2777,10 +2786,10 @@ fn verify_dhcpv6_essentials<'a>( parse_outbound(request_pkt, GenericUlp {}).unwrap().to_full_meta(); let reply_pkt = parse_inbound(reply_pkt, GenericUlp {}).unwrap().to_full_meta(); - let request_meta = request_pkt.meta(); - let reply_meta = reply_pkt.meta(); - let request_ether = request_meta.inner_ether(); - let reply_ether = reply_meta.inner_ether(); + let request_meta = request_pkt.headers(); + let reply_meta = reply_pkt.headers(); + let request_ether = &request_meta.inner_eth; + let reply_ether = &reply_meta.inner_eth; assert_eq!( request_ether.destination(), dhcpv6::ALL_RELAYS_AND_SERVERS.multicast_mac().unwrap() @@ -2906,7 +2915,7 @@ fn test_reply_to_dhcpv6_solicit_or_request() { // inbound packet to be encapsulated. pcap.add_pkt(&hp); - let reply_pkt = + let mut reply_pkt = parse_inbound(&mut hp, GenericUlp {}).unwrap().to_full_meta(); let out_body = reply_pkt.meta().copy_remaining(); drop(reply_pkt); @@ -3030,8 +3039,14 @@ fn establish_http_conn( ] ); let pkt1 = parse_inbound(&mut pkt1_m, VpcParser {}).unwrap(); - let snat_port = - pkt1.to_full_meta().meta().inner_ulp().unwrap().src_port().unwrap(); + let snat_port = pkt1 + .to_full_meta() + .headers() + .inner_ulp + .as_ref() + .unwrap() + .src_port() + .unwrap(); // ================================================================ // Step 2 @@ -3340,8 +3355,14 @@ fn test_outbound_http(g1_cfg: &VpcCfg, g1: &mut PortAndVps) -> InnerFlowId { ] ); let pkt1 = parse_inbound(&mut pkt1_m, VpcParser {}).unwrap(); - let snat_port = - pkt1.to_full_meta().meta().inner_ulp().unwrap().src_port().unwrap(); + let snat_port = pkt1 + .to_full_meta() + .headers() + .inner_ulp + .as_ref() + .unwrap() + .src_port() + .unwrap(); assert_eq!(TcpState::SynSent, g1.port.tcp_state(&flow).unwrap()); // ================================================================ @@ -3628,8 +3649,14 @@ fn early_tcp_invalidation() { ); assert_eq!(TcpState::SynSent, g1.port.tcp_state(&flow).unwrap()); let pkt1 = parse_inbound(&mut pkt1_m, VpcParser {}).unwrap(); - let snat_port = - pkt1.to_full_meta().meta().inner_ulp().unwrap().src_port().unwrap(); + let snat_port = pkt1 + .to_full_meta() + .headers() + .inner_ulp + .as_ref() + .unwrap() + .src_port() + .unwrap(); // ================================================================ // Drive to established, then validate the same applies to inbound @@ -3821,7 +3848,7 @@ fn ephemeral_ip_preferred_over_snat_outbound() { let pkt1 = parse_inbound(&mut pkt1_m, VpcParser {}).unwrap().to_full_meta(); assert_eq!( - pkt1.meta().inner_ip4().unwrap().source(), + pkt1.headers().inner_ip4().unwrap().source(), "10.60.1.20".parse().unwrap(), "did not choose assigned ephemeral IP" ); @@ -3916,8 +3943,14 @@ fn tcp_inbound() { ); let pkt1 = parse_outbound(&mut pkt1_m, VpcParser {}).unwrap(); let flow = pkt1.flow().mirror(); - let sport = - pkt1.to_full_meta().meta().inner_ulp().unwrap().src_port().unwrap(); + let sport = pkt1 + .to_full_meta() + .headers() + .inner_ulp + .as_ref() + .unwrap() + .src_port() + .unwrap(); assert_eq!(TcpState::Listen, g1.port.tcp_state(&flow).unwrap()); // ================================================================ @@ -4405,12 +4438,12 @@ fn port_as_router_target() { // Encap routes between sleds correctly, inner IPs are not modified, // and L2 dst matches the guest's NIC. - let v6_encap_meta = &pkt1.meta().outer_v6; + let v6_encap_meta = &pkt1.headers().outer_v6; assert_eq!(v6_encap_meta.source(), g1_cfg.phys_ip); assert_eq!(v6_encap_meta.destination(), g2_cfg.phys_ip); - assert_eq!(pkt1.meta().inner_eth.destination(), g2_cfg.guest_mac); - assert_eq!(pkt1.meta().inner_eth.source(), g1_cfg.guest_mac); - let ValidL3::Ipv4(inner_ip4) = &pkt1.meta().inner_l3 else { + assert_eq!(pkt1.headers().inner_eth.destination(), g2_cfg.guest_mac); + assert_eq!(pkt1.headers().inner_eth.source(), g1_cfg.guest_mac); + let ValidL3::Ipv4(inner_ip4) = &pkt1.headers().inner_l3 else { panic!("encapped v4 packet did not parse back as v4"); }; assert_eq!(inner_ip4.source(), g1_cfg.ipv4().private_ip); @@ -4636,7 +4669,7 @@ fn select_eip_conditioned_on_igw() { expect_modified!(res, pkt1_m); let pkt1 = parse_inbound(&mut pkt1_m, VpcParser {}).unwrap().to_full_meta(); assert_eq!( - pkt1.meta().inner_ip4().unwrap().source(), + pkt1.headers().inner_ip4().unwrap().source(), g1_cfg.ipv4().external_ips.ephemeral_ip.unwrap() ); incr!( @@ -4666,7 +4699,7 @@ fn select_eip_conditioned_on_igw() { let pkt2 = parse_inbound(&mut pkt2_m, VpcParser {}).unwrap().to_full_meta(); assert!( &g1_cfg.ipv4().external_ips.floating_ips[..2] - .contains(&pkt2.meta().inner_ip4().unwrap().source()) + .contains(&pkt2.headers().inner_ip4().unwrap().source()) ); incr!( g1, @@ -4694,7 +4727,7 @@ fn select_eip_conditioned_on_igw() { expect_modified!(res, pkt3_m); let pkt3 = parse_inbound(&mut pkt3_m, VpcParser {}).unwrap().to_full_meta(); assert_eq!( - pkt3.meta().inner_ip4().unwrap().source(), + pkt3.headers().inner_ip4().unwrap().source(), g1_cfg.ipv4().external_ips.floating_ips[2] ); incr!( @@ -4747,7 +4780,7 @@ fn select_eip_conditioned_on_igw() { let pkt5 = parse_inbound(&mut pkt5_m, VpcParser {}).unwrap().to_full_meta(); assert!( &g1_cfg.ipv4().external_ips.floating_ips[..] - .contains(&pkt5.meta().inner_ip4().unwrap().source()) + .contains(&pkt5.headers().inner_ip4().unwrap().source()) ); incr!( g1, diff --git a/xde/src/xde.rs b/xde/src/xde.rs index c2ec46da..a8f26f46 100644 --- a/xde/src/xde.rs +++ b/xde/src/xde.rs @@ -1823,7 +1823,7 @@ fn guest_loopback( } }; - let meta = parsed_pkt.meta(); + let meta = parsed_pkt.headers(); let old_len = parsed_pkt.len(); let ulp_meoi = match meta.ulp_meoi(old_len) { @@ -1836,7 +1836,7 @@ fn guest_loopback( let flow = parsed_pkt.flow(); - let ether_dst = parsed_pkt.meta().inner_eth.destination(); + let ether_dst = parsed_pkt.headers().inner_eth.destination(); let port_key = VniMac::new(vni, ether_dst); let maybe_dest_dev = entry_state.get_by_key(port_key); @@ -2024,7 +2024,7 @@ fn xde_mc_tx_one<'a>( }; let old_len = parsed_pkt.len(); - let meta = parsed_pkt.meta(); + let meta = parsed_pkt.headers(); let Ok(non_eth_payl_bytes) = u32::try_from((&meta.inner_l3, &meta.inner_ulp).packet_length()) else { @@ -2495,7 +2495,7 @@ fn xde_rx_one( } }; - let meta = parsed_pkt.meta(); + let meta = parsed_pkt.headers(); let old_len = parsed_pkt.len(); let ulp_meoi = match meta.ulp_meoi(old_len) { From 1040b51346b06e75cda641c7b656b6628e95b7de Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Tue, 1 Jul 2025 17:40:00 +0100 Subject: [PATCH 28/37] Some rearrangement (and docs). --- lib/opte/src/engine/layer.rs | 6 +- lib/opte/src/engine/packet.rs | 163 +++++++++++++++++++++------------- lib/opte/src/engine/rule.rs | 2 +- lib/opte/src/engine/stat.rs | 68 +++++--------- 4 files changed, 128 insertions(+), 111 deletions(-) diff --git a/lib/opte/src/engine/layer.rs b/lib/opte/src/engine/layer.rs index b8c5aefc..3c622e9d 100644 --- a/lib/opte/src/engine/layer.rs +++ b/lib/opte/src/engine/layer.rs @@ -786,8 +786,8 @@ impl Layer { ft_limit: NonZeroU32, ) -> Self { let stats = port.stats_mut(); - let default_in_stat = stats.root(actions.default_in_stat_id); - let default_out_stat = stats.root(actions.default_out_stat_id); + let default_in_stat = stats.new_root(actions.default_in_stat_id); + let default_out_stat = stats.new_root(actions.default_out_stat_id); let port_name = port.name(); @@ -1656,7 +1656,7 @@ pub enum RuleRemoveErr { impl RuleTable { fn add(&mut self, rule: Rule, stats: &mut StatTree) { - let stat = stats.root(rule.stat_id().copied()); + let stat = stats.new_root(rule.stat_id().copied()); match self.find_pos(&rule) { RulePlace::End => { let rte = diff --git a/lib/opte/src/engine/packet.rs b/lib/opte/src/engine/packet.rs index 97e35618..954a6ef6 100644 --- a/lib/opte/src/engine/packet.rs +++ b/lib/opte/src/engine/packet.rs @@ -379,6 +379,63 @@ impl OpteMeta { } } +impl From> for OpteMeta { + #[inline] + fn from(value: NoEncap) -> Self { + OpteMeta { + outer_eth: None, + outer_l3: None, + outer_encap: None, + inner_eth: value.inner_eth, + inner_l3: value.inner_l3, + inner_ulp: value.inner_ulp, + } + } +} + +impl From<&OpteMeta> for InnerFlowId { + #[inline] + fn from(meta: &OpteMeta) -> Self { + let (proto, addrs) = match &meta.inner_l3 { + Some(L3::Ipv4(pkt)) => ( + pkt.protocol().0, + AddrPair::V4 { src: pkt.source(), dst: pkt.destination() }, + ), + Some(L3::Ipv6(pkt)) => ( + pkt.next_layer().unwrap_or_default().0, + AddrPair::V6 { src: pkt.source(), dst: pkt.destination() }, + ), + None => (255, FLOW_ID_DEFAULT.addrs), + }; + + let proto_info = match &meta.inner_ulp { + Some(Ulp::Tcp(t)) => { + PortInfo { src_port: t.source(), dst_port: t.destination() } + .into() + } + Some(Ulp::Udp(u)) => { + PortInfo { src_port: u.source(), dst_port: u.destination() } + .into() + } + Some(Ulp::IcmpV4(v4)) => IcmpInfo { + ty: v4.ty().0, + code: v4.code(), + id: v4.echo_id().unwrap_or_default(), + } + .into(), + Some(Ulp::IcmpV6(v6)) => IcmpInfo { + ty: v6.ty().0, + code: v6.code(), + id: v6.echo_id().unwrap_or_default(), + } + .into(), + _ => Default::default(), + }; + + InnerFlowId { proto, addrs, proto_info } + } +} + /// Helper for conditionally pulling up a packet when required, /// to provide safe read/write access to the packet body. /// @@ -543,7 +600,12 @@ impl Drop for PktBodyWalker { } } -/// View of [`PacketData`] for use in action bodies. +/// Per-packet context for use within (stateful) actions. +/// +/// This view type provides read-only access to the packet's headers and body, +/// which allow for an action to determine in more detail how a packet should be +/// modified. Additionally, this allows for an action to insert particular +/// [`RootStat`] objects into the packet trace. pub struct PacketDataView<'a, T: Read + Pullup> { pub headers: &'a OpteMeta, pub initial_lens: &'a InitialLayerLens, @@ -558,6 +620,13 @@ impl core::fmt::Debug for PacketDataView<'_, T> { } impl PacketDataView<'_, T> { + /// Examine a packet's body, beginning after the last parsed layer in + /// `headers`. + /// + /// This should be avoided unless required -- if a packet's body is split + /// over several segments or has a shared refcount, then the packet body + /// will be puleld up into a single segment. This cost is paid at most + /// once per packet. pub fn body(&self) -> &[u8] where T::Chunk: ByteSliceMut, @@ -566,6 +635,9 @@ impl PacketDataView<'_, T> { self.body.body() } + /// Copy the packet's body into a new `Vec`. + /// + /// Comes with the same performance caveats as [`Self::body`]. pub fn copy_remaining(&self) -> Vec where T::Chunk: ByteSliceMut, @@ -575,6 +647,9 @@ impl PacketDataView<'_, T> { base.to_vec() } + /// Append the packet's body to an existing `Vec`. + /// + /// Comes with the same performance caveats as [`Self::body`]. pub fn append_remaining(&self, buf: &mut Vec) where T::Chunk: ByteSliceMut, @@ -586,11 +661,34 @@ impl PacketDataView<'_, T> { /// Push a stat object for this layer of packet processing, in addition to /// that of the current rule. This allows one rule to be associated with - /// several control-plane level objects, and associate states with each + /// several control-plane level objects, and to associate states with each /// as needed. /// - /// Dataplane designs should avoid pushing the same root stat in multiple - /// layers -- see the commentary [`FlowStatBuilder`]. + /// ## Ensuring exact counting + /// If an LFT entry is created, all [`RootStat`]s from the current layer are + /// collected and assigned a new internal stat node as a child. + /// + /// For stats to be measured exactly (i.e., without any nondeterministic + /// double/triple-counting) you must ensure that your [`NetworkImpl`] is designed + /// so that each [`RootStat`] you define is only reachable by at most one path + /// in a flow. Duplicate root stats (within a flow or internal node) are + /// trivially filtered out, but reusing a [`RootStat`] in, e.g., a layer which + /// generates an LFT entry and then as the rule-stat in a stateless layer poses + /// problems. + /// + /// I.e., consider the below case: + /// ```text + /// flow(abcd)[ RootStat(0), RootStat(1), InternalStat(2), RootStat(3) ] + /// ^ + /// | + /// [ RootStat(1), RootStat(4), ... ] + /// ``` + /// `InternalNode(2)` could expire at a *later time* than `flow(abcd)`, + /// which means that it and `RootStat(1)` will inherit the flow stats on + /// its closure, and then RootStat(1) will inherit these *again* once + /// `InternalNode(2)` expires. + /// + /// [`NetworkImpl`]: super::NetworkImpl pub fn push_stat(&mut self, stat: Arc) { self.stats.push(stat.into()); } @@ -609,20 +707,6 @@ pub(crate) struct PacketData { pub(crate) stats: FlowStatBuilder, } -impl From> for OpteMeta { - #[inline] - fn from(value: NoEncap) -> Self { - OpteMeta { - outer_eth: None, - outer_l3: None, - outer_encap: None, - inner_eth: value.inner_eth, - inner_l3: value.inner_l3, - inner_ulp: value.inner_ulp, - } - } -} - impl core::fmt::Debug for PacketData { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { f.write_str("PacketHeaders(..)") @@ -640,49 +724,6 @@ impl PacketData { } } -impl From<&OpteMeta> for InnerFlowId { - #[inline] - fn from(meta: &OpteMeta) -> Self { - let (proto, addrs) = match &meta.inner_l3 { - Some(L3::Ipv4(pkt)) => ( - pkt.protocol().0, - AddrPair::V4 { src: pkt.source(), dst: pkt.destination() }, - ), - Some(L3::Ipv6(pkt)) => ( - pkt.next_layer().unwrap_or_default().0, - AddrPair::V6 { src: pkt.source(), dst: pkt.destination() }, - ), - None => (255, FLOW_ID_DEFAULT.addrs), - }; - - let proto_info = match &meta.inner_ulp { - Some(Ulp::Tcp(t)) => { - PortInfo { src_port: t.source(), dst_port: t.destination() } - .into() - } - Some(Ulp::Udp(u)) => { - PortInfo { src_port: u.source(), dst_port: u.destination() } - .into() - } - Some(Ulp::IcmpV4(v4)) => IcmpInfo { - ty: v4.ty().0, - code: v4.code(), - id: v4.echo_id().unwrap_or_default(), - } - .into(), - Some(Ulp::IcmpV6(v6)) => IcmpInfo { - ty: v6.ty().0, - code: v6.code(), - id: v6.echo_id().unwrap_or_default(), - } - .into(), - _ => Default::default(), - }; - - InnerFlowId { proto, addrs, proto_info } - } -} - /// A network packet. /// /// A packet is made up of one or more segments. Any given header is diff --git a/lib/opte/src/engine/rule.rs b/lib/opte/src/engine/rule.rs index 692c7426..cc461eb9 100644 --- a/lib/opte/src/engine/rule.rs +++ b/lib/opte/src/engine/rule.rs @@ -601,7 +601,7 @@ impl HdrTransform { } /// Run this header transformation against the passed in - /// [`PacketData`], mutating it in place. + /// [`Packet`], mutating it in place. /// /// Returns whether the inner checksum needs recomputed. /// diff --git a/lib/opte/src/engine/stat.rs b/lib/opte/src/engine/stat.rs index abadd4e3..4fbd2ec7 100644 --- a/lib/opte/src/engine/stat.rs +++ b/lib/opte/src/engine/stat.rs @@ -28,7 +28,6 @@ use opte_api::TcpState; use uuid::Uuid; // TODO READOUT OF STAT FROM GIVEN ROOT(S). -// TODO restrict most of this to pub(crate)? /// Opaque identifier for tracking unique stat objects. #[derive(Copy, Clone, Hash, PartialEq, PartialOrd, Eq, Ord, Debug)] @@ -414,11 +413,11 @@ impl From<&PacketCounter> for ApiPktCounter { } /// Counts of actions taken/packets encountered by a rule. -pub struct FullCounter { - pub allow: AtomicU64, - pub deny: AtomicU64, - pub hairpin: AtomicU64, - pub packets: PacketCounter, +struct FullCounter { + allow: AtomicU64, + deny: AtomicU64, + hairpin: AtomicU64, + packets: PacketCounter, } impl FullCounter { @@ -490,7 +489,7 @@ impl StatTree { /// Gets or creates the root stat for a given UUID. /// /// Allocates a new UUID if none is provided. - pub fn root(&mut self, uuid: Option) -> Arc { + pub fn new_root(&mut self, uuid: Option) -> Arc { let uuid = uuid.unwrap_or_else(|| Uuid::from_u64_pair(0, self.next_id)); let ids = &mut self.next_id; @@ -507,7 +506,7 @@ impl StatTree { } /// Creates a new internal node from a given set of parents. - pub fn new_intermediate( + fn new_intermediate( &mut self, parents: Vec, ) -> Arc { @@ -528,8 +527,8 @@ impl StatTree { out } - /// Gets or creates the flow stat - pub fn new_flow( + /// Gets or creates the flow stat associated with a pair of 5-tuples. + pub(crate) fn new_flow( &mut self, flow_id: &InnerFlowId, partner_flow: &InnerFlowId, @@ -798,30 +797,7 @@ fn get_base_ids(parents: &[StatParent]) -> BTreeSet { /// Collects stats as a packet is processed, keeping track of the boundary /// of the most recent layer. -/// -/// ## Ensuring exact counting -/// For stats to be measured exactly (i.e., without any nondeterministic -/// double/triple-counting) you must ensure that your [`NetworkImpl`] is designed -/// so that each [`RootStat`] you define is only reachable by at most one path -/// from any flow. Duplicate root stats (in a flow or internal node) are -/// trivially filtered out, but reusing a [`RootStat`] in, e.g., a layer which -/// generates an LFT entry and then as the rule-stat in a stateless layer poses -/// problems. -/// -/// I.e., consider the below case: -/// ```text -/// flow(abcd)[ RootStat(0), RootStat(1), InternalNode(2), RootStat(3) ] -/// ^ -/// | -/// [ RootStat(1), RootStat(4), ... ] -/// ``` -/// `InternalNode(2)` could expire at a *later time* than `flow(abcd)`, -/// which means that it and `RootStat(1)` will inherit the flow stats on -/// its closure, and then RootStat(1) will inherit these *again* once -/// `InternalNode(2)` expires. -/// -/// [`NetworkImpl`]: super::NetworkImpl -pub struct FlowStatBuilder { +pub(crate) struct FlowStatBuilder { parents: Vec, layer_end: usize, } @@ -975,10 +951,10 @@ mod tests { // All stats in the last layer instead increment the terminal action. let mut tree = StatTree::default(); - let r0 = tree.root(Some(ROOT_0)); - let r1 = tree.root(Some(ROOT_1)); - let r2 = tree.root(Some(ROOT_2)); - let r3 = tree.root(Some(ROOT_3)); + let r0 = tree.new_root(Some(ROOT_0)); + let r1 = tree.new_root(Some(ROOT_1)); + let r2 = tree.new_root(Some(ROOT_2)); + let r3 = tree.new_root(Some(ROOT_3)); let i0 = tree.new_intermediate(vec![r0.into()]); let i1 = tree.new_intermediate(vec![r2.into()]); @@ -1036,10 +1012,10 @@ mod tests { fn flow_lifecycle() { let mut tree = StatTree::default(); - let r0 = tree.root(Some(ROOT_0)); - let r1 = tree.root(Some(ROOT_1)); - let r2 = tree.root(Some(ROOT_2)); - let r3 = tree.root(Some(ROOT_3)); + let r0 = tree.new_root(Some(ROOT_0)); + let r1 = tree.new_root(Some(ROOT_1)); + let r2 = tree.new_root(Some(ROOT_2)); + let r3 = tree.new_root(Some(ROOT_3)); let i0 = tree.new_intermediate(vec![Arc::clone(&r0).into()]); let i1 = tree.new_intermediate(vec![Arc::clone(&r1).into()]); @@ -1169,10 +1145,10 @@ mod tests { fn root_counters() { let mut tree = StatTree::default(); - let r0 = tree.root(Some(ROOT_0)); - let r1 = tree.root(Some(ROOT_1)); - let r2 = tree.root(Some(ROOT_2)); - let r3 = tree.root(Some(ROOT_3)); + let r0 = tree.new_root(Some(ROOT_0)); + let r1 = tree.new_root(Some(ROOT_1)); + let r2 = tree.new_root(Some(ROOT_2)); + let r3 = tree.new_root(Some(ROOT_3)); let i0 = tree.new_intermediate(vec![Arc::clone(&r0).into()]); let i1 = tree.new_intermediate(vec![Arc::clone(&r1).into()]); From 48cf6d6cf8855b73a6fae421dce4b61a83cfd038 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Wed, 2 Jul 2025 12:51:07 +0100 Subject: [PATCH 29/37] Start narrowing down `pub`. --- crates/opte-api/src/stat.rs | 1 + lib/opte/src/engine/port/mod.rs | 4 ++ lib/opte/src/engine/stat.rs | 110 ++++++++++++++++++-------------- 3 files changed, 66 insertions(+), 49 deletions(-) diff --git a/crates/opte-api/src/stat.rs b/crates/opte-api/src/stat.rs index f7f8afa3..fd30d0be 100644 --- a/crates/opte-api/src/stat.rs +++ b/crates/opte-api/src/stat.rs @@ -16,6 +16,7 @@ use uuid::Uuid; pub struct FlowStat { pub partner: FlowId, pub dir: Direction, + pub first_dir: Direction, pub bases: Vec, pub stats: PacketCounter, } diff --git a/lib/opte/src/engine/port/mod.rs b/lib/opte/src/engine/port/mod.rs index 41281bd0..d6767473 100644 --- a/lib/opte/src/engine/port/mod.rs +++ b/lib/opte/src/engine/port/mod.rs @@ -1556,6 +1556,8 @@ impl Port { .as_mut() .expect("lock should be held on this codepath"); + pkt.meta_internal_mut().stats.reserve(16); + let res = self.process_in_miss( data, epoch, @@ -1574,6 +1576,8 @@ impl Port { .as_mut() .expect("lock should be held on this codepath"); + pkt.meta_internal_mut().stats.reserve(16); + let res = self.process_out_miss(data, epoch, &mut pkt, &mut ameta); diff --git a/lib/opte/src/engine/stat.rs b/lib/opte/src/engine/stat.rs index 4fbd2ec7..559e3f04 100644 --- a/lib/opte/src/engine/stat.rs +++ b/lib/opte/src/engine/stat.rs @@ -31,7 +31,7 @@ use uuid::Uuid; /// Opaque identifier for tracking unique stat objects. #[derive(Copy, Clone, Hash, PartialEq, PartialOrd, Eq, Ord, Debug)] -pub struct StatId(u64); +struct StatId(u64); impl StatId { fn new(val: &mut u64) -> Self { @@ -54,19 +54,19 @@ pub enum Action { /// flow's 5-tuple. pub struct FlowStat { /// The direction of this flow half. - pub dir: Direction, + dir: Direction, /// The other half of this flow. - pub partner: InnerFlowId, + partner: InnerFlowId, /// `TableStat`s to whom we must return our own `stats`. - pub parents: Box<[StatParent]>, + parents: Box<[StatParent]>, /// The cached list of IDs of reachable `RootStat` entries. - pub bases: BTreeSet, + bases: BTreeSet, /// Actual stats associated with this flow. - pub shared: Arc, + shared: Arc, /// When was this flow last updated? - pub last_hit: AtomicU64, + last_hit: AtomicU64, } impl FlowStat { @@ -85,17 +85,18 @@ impl FlowStat { /// Packet counters shared by both halves of a flow. Each 5-tuple references /// this struct through a [`FlowStat`]. -pub struct SharedFlowStat { +struct SharedFlowStat { /// Counters associated with this flow. - pub stats: PacketCounter, + stats: PacketCounter, + #[expect(unused)] /// Estimated TCP state from monitoring a flow. /// /// XXX: TODO - pub tcp: Option, + tcp: Option, /// The direction this flow was opened on. - pub first_dir: Direction, + first_dir: Direction, } impl From<&FlowStat> for ApiFlowStat { @@ -103,6 +104,7 @@ impl From<&FlowStat> for ApiFlowStat { ApiFlowStat { partner: value.partner, dir: value.dir, + first_dir: value.shared.first_dir, bases: value.bases.iter().copied().collect(), stats: (&value.shared.stats).into(), } @@ -111,7 +113,7 @@ impl From<&FlowStat> for ApiFlowStat { /// Stat objects which can be a parent to a non-root node. #[derive(Clone, Debug)] -pub enum StatParent { +pub(crate) enum StatParent { Root(Arc), Internal(Arc), } @@ -154,29 +156,20 @@ impl StatParent { } } - /// Allow a packet which will track local stats via a UFT entry. - pub fn allow(&self) { - self.allow_at(Moment::now()); - } - - /// Allow a packet (at a given timestamp) which will track local stats via - /// a UFT entry. - pub fn allow_at(&self, time: Moment) { + /// Allow a packet (at a given timestamp), without recording packet size/counts. + /// + /// This should be used when a flow will track such local stats via a UFT + /// entry. + fn allow_at(&self, time: Moment) { if let Self::Root(r) = self { r.record_hit(time); } self.inner().allow(); } - /// Record an action for a packet which will ultimately be dropped or - /// hairpinned. - pub fn act(&self, action: Action, pkt_size: u64, direction: Direction) { - self.act_at(action, pkt_size, direction, Moment::now()); - } - /// Record an action for a packet (at a given time) which will ultimately - /// be dropped or hairpinned. - pub fn act_at( + /// be dropped or hairpinned. E.g., when no UFT will be created for a packet. + fn act_at( &self, action: Action, pkt_size: u64, @@ -190,7 +183,7 @@ impl StatParent { } /// Add a weak child reference to this stat object. - pub fn append_child(&self, child: impl Into) { + fn append_child(&self, child: impl Into) { let mut p_children = self.inner().children.write(); p_children.push(child.into()); } @@ -198,7 +191,7 @@ impl StatParent { /// Stat objects which can be a child to a non-leaf node. #[derive(Clone, Debug)] -pub enum StatChild { +enum StatChild { Internal(Weak), Flow(Weak), } @@ -260,15 +253,18 @@ pub struct RootStat { /// The control-plane ID associated with these counters. pub id: Uuid, /// When was a hit last recorded? - pub last_hit: AtomicU64, + last_hit: AtomicU64, body: TableStat, } impl RootStat { + /// Update the `last_hit` time of this stat. fn record_hit(&self, time: Moment) { self.last_hit.store(time.raw(), Ordering::Relaxed); } + /// Retrieve hit/packet stats reported by this stat object and all of + /// its live children. fn combined_stats(&self) -> ApiFullCounter { let mut visited = BTreeSet::new(); @@ -299,8 +295,8 @@ impl RootStat { /// Temporary counters associated with an LFT entry. #[derive(Debug)] -pub struct InternalStat { - pub parents: Box<[StatParent]>, +pub(crate) struct InternalStat { + parents: Box<[StatParent]>, body: TableStat, } @@ -324,10 +320,16 @@ impl core::fmt::Debug for TableStat { } impl TableStat { + /// Allow a packet (at a given timestamp), without recording packet size/counts. + /// + /// This should be used when a flow will track such local stats via a UFT + /// entry. fn allow(&self) { self.stats.allow.fetch_add(1, Ordering::Relaxed); } + /// Record an action for a packet (at a given time) which will ultimately + /// be dropped or hairpinned. E.g., when no UFT will be created for a packet. fn act(&self, action: Action, pkt_size: u64, direction: Direction) { self.stats.packets.hit(direction, pkt_size); match action { @@ -342,14 +344,14 @@ impl TableStat { /// Packet count/byte counters. /// /// Base component of any counter set in OPTE. -pub struct PacketCounter { - pub id: StatId, - pub created_at: Moment, - - pub pkts_in: AtomicU64, - pub bytes_in: AtomicU64, - pub pkts_out: AtomicU64, - pub bytes_out: AtomicU64, +struct PacketCounter { + id: StatId, + created_at: Moment, + + pkts_in: AtomicU64, + bytes_in: AtomicU64, + pkts_out: AtomicU64, + bytes_out: AtomicU64, } impl PacketCounter { @@ -375,6 +377,7 @@ impl PacketCounter { bytes.fetch_add(pkt_size, Ordering::Relaxed); } + /// Increment the values of `into` using all matching counters in `self`. fn combine(&self, into: &Self) { into.pkts_in .fetch_add(self.pkts_in.load(Ordering::Relaxed), Ordering::Relaxed); @@ -392,6 +395,7 @@ impl PacketCounter { ); } + /// Increment the values of `into` using all matching counters in `self`. fn combine_api(&self, into: &mut ApiPktCounter) { into.pkts_in += self.pkts_in.load(Ordering::Relaxed); into.bytes_in += self.bytes_in.load(Ordering::Relaxed); @@ -430,6 +434,7 @@ impl FullCounter { } } + /// Increment the values of `into` using all matching counters in `self`. fn combine(&self, into: &Self) { self.packets.combine(&into.packets); into.allow @@ -440,6 +445,7 @@ impl FullCounter { .fetch_add(self.hairpin.load(Ordering::Relaxed), Ordering::Relaxed); } + /// Increment the values of `into` using all matching counters in `self`. fn combine_api(&self, into: &mut ApiFullCounter) { self.packets.combine_api(&mut into.packets); into.allow += self.allow.load(Ordering::Relaxed); @@ -780,7 +786,7 @@ impl StatTree { } } -/// Return the underlying stats of decision-making rules which allowed a flow. +/// Return the underlying stat IDs of decision-making rules which allowed a flow. fn get_base_ids(parents: &[StatParent]) -> BTreeSet { let mut out = BTreeSet::new(); @@ -803,27 +809,33 @@ pub(crate) struct FlowStatBuilder { } impl FlowStatBuilder { - pub fn new() -> Self { + pub(crate) fn new() -> Self { Self { - // TODO: do we want this cfg'able? - parents: Vec::with_capacity(16), + parents: Vec::with_capacity(0), layer_end: 0, } } + pub(crate) fn reserve(&mut self, capacity: usize) { + self.parents.reserve(capacity); + } + /// Push a parent onto this flow. - pub fn push(&mut self, parent: StatParent) { + pub(crate) fn push(&mut self, parent: StatParent) { self.parents.push(parent); } /// Mark all current parents as [`Action::Allow`]. - pub fn new_layer(&mut self) { + pub(crate) fn new_layer(&mut self) { self.layer_end = self.parents.len(); } /// Mark all current parents as [`Action::Allow`], moving them all into /// a new [`InternalStat`]. - pub fn new_layer_lft(&mut self, tree: &mut StatTree) -> Arc { + pub(crate) fn new_layer_lft( + &mut self, + tree: &mut StatTree, + ) -> Arc { let out = tree.new_intermediate(self.parents.split_off(self.layer_end)); self.parents.push(Arc::clone(&out).into()); self.new_layer(); @@ -831,7 +843,7 @@ impl FlowStatBuilder { } /// Return a list of stat parents if this packet is bound for flow creation. - pub fn terminate( + pub(crate) fn terminate( &mut self, action: Action, pkt_size: u64, From 643bd3ac992f8dd1eb3e075ce590eae80176743b Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Wed, 2 Jul 2025 13:09:51 +0100 Subject: [PATCH 30/37] fmt --- lib/opte/src/engine/stat.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/lib/opte/src/engine/stat.rs b/lib/opte/src/engine/stat.rs index 559e3f04..73c59863 100644 --- a/lib/opte/src/engine/stat.rs +++ b/lib/opte/src/engine/stat.rs @@ -810,10 +810,7 @@ pub(crate) struct FlowStatBuilder { impl FlowStatBuilder { pub(crate) fn new() -> Self { - Self { - parents: Vec::with_capacity(0), - layer_end: 0, - } + Self { parents: Vec::with_capacity(0), layer_end: 0 } } pub(crate) fn reserve(&mut self, capacity: usize) { From f60f8840c22f4281567aa5a0c0e5227453d78d72 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Wed, 2 Jul 2025 15:11:44 +0100 Subject: [PATCH 31/37] Flow stats are no longer the domain of TCP --- crates/opte-api/src/cmd.rs | 4 -- lib/opte/src/engine/port/mod.rs | 73 ++++----------------------------- lib/opte/src/print.rs | 11 +---- 3 files changed, 10 insertions(+), 78 deletions(-) diff --git a/crates/opte-api/src/cmd.rs b/crates/opte-api/src/cmd.rs index f9213765..ae20bde5 100644 --- a/crates/opte-api/src/cmd.rs +++ b/crates/opte-api/src/cmd.rs @@ -376,10 +376,6 @@ pub struct TcpFlowEntryDump { pub hits: u64, pub inbound_ufid: Option, pub tcp_state: TcpFlowStateDump, - pub segs_in: u64, - pub segs_out: u64, - pub bytes_in: u64, - pub bytes_out: u64, } #[derive(Debug, Deserialize, Serialize)] diff --git a/lib/opte/src/engine/port/mod.rs b/lib/opte/src/engine/port/mod.rs index d6767473..e0618967 100644 --- a/lib/opte/src/engine/port/mod.rs +++ b/lib/opte/src/engine/port/mod.rs @@ -1428,7 +1428,6 @@ impl Port { self.name_cstr.as_c_str(), tcp, dir, - pkt_len, ufid_in, ) { Ok(TcpState::Closed) => Some(Arc::clone(tcp_flow)), @@ -2174,7 +2173,6 @@ impl Port { tcp_flows: &mut FlowTable, tcp: &impl TcpRef, dir: &TcpDirection, - pkt_len: u64, ) -> result::Result { // Create a new entry and find its current state. In // this case it should always be `SynSent`, unless we're @@ -2200,14 +2198,11 @@ impl Port { let (ufid_out, tfes) = match *dir { TcpDirection::In { ufid_in, ufid_out } => ( ufid_out, - TcpFlowEntryState::new_inbound( - *ufid_out, *ufid_in, tfs, pkt_len, - ), - ), - TcpDirection::Out { ufid_out } => ( - ufid_out, - TcpFlowEntryState::new_outbound(*ufid_out, tfs, pkt_len), + TcpFlowEntryState::new_inbound(*ufid_out, *ufid_in, tfs), ), + TcpDirection::Out { ufid_out } => { + (ufid_out, TcpFlowEntryState::new_outbound(*ufid_out, tfs)) + } }; match tcp_flows.add_and_return(*ufid_out, tfes) { Ok(entry) => Ok(TcpMaybeClosed::NewState(tcp_state, entry)), @@ -2248,7 +2243,6 @@ impl Port { data: &mut PortData, tcp: &impl TcpRef, dir: &TcpDirection, - pkt_len: u64, ) -> result::Result { let (ufid_out, ufid_in) = match *dir { TcpDirection::In { ufid_in, ufid_out } => (ufid_out, Some(ufid_in)), @@ -2267,7 +2261,6 @@ impl Port { self.name_cstr.as_c_str(), tcp, dir.dir(), - pkt_len, ufid_in, ); @@ -2317,7 +2310,6 @@ impl Port { data: &mut PortData, pmeta: &MblkPacketData, ufid_in: &InnerFlowId, - pkt_len: u64, ) -> result::Result { // All TCP flows are keyed with respect to the outbound Flow // ID, therefore we mirror the flow. This value must represent @@ -2334,18 +2326,13 @@ impl Port { let dir = TcpDirection::In { ufid_in, ufid_out: &ufid_out }; - match self.update_tcp_entry(data, tcp, &dir, pkt_len) { + match self.update_tcp_entry(data, tcp, &dir) { // We need to create a new TCP entry here because we can't call // `process_in_miss` on the already-modified packet. Err( ProcessError::TcpFlow(TcpFlowStateError::NewFlow { .. }) | ProcessError::MissingFlow(_), - ) => self.create_new_tcp_entry( - &mut data.tcp_flows, - tcp, - &dir, - pkt_len, - ), + ) => self.create_new_tcp_entry(&mut data.tcp_flows, tcp, &dir), v => v, } } @@ -2454,12 +2441,7 @@ impl Port { // For inbound traffic the TCP flow table must be // checked _after_ processing take place. if pkt.meta_internal().headers.is_inner_tcp() { - match self.process_in_tcp( - data, - pkt.meta_internal(), - ufid_in, - pkt_len, - ) { + match self.process_in_tcp(data, pkt.meta_internal(), ufid_in) { Ok(TcpMaybeClosed::Closed { .. }) => { Ok(InternalProcessResult::Modified) } @@ -2553,21 +2535,15 @@ impl Port { data: &mut PortData, ufid_out: &InnerFlowId, pmeta: &MblkPacketData, - pkt_len: u64, ) -> result::Result { let tcp = pmeta.headers.inner_tcp().unwrap(); let dir = TcpDirection::Out { ufid_out }; - match self.update_tcp_entry(data, tcp, &dir, pkt_len) { + match self.update_tcp_entry(data, tcp, &dir) { Err( ProcessError::TcpFlow(TcpFlowStateError::NewFlow { .. }) | ProcessError::MissingFlow(_), - ) => self.create_new_tcp_entry( - &mut data.tcp_flows, - tcp, - &dir, - pkt_len, - ), + ) => self.create_new_tcp_entry(&mut data.tcp_flows, tcp, &dir), other => other, } } @@ -2593,7 +2569,6 @@ impl Port { data, pkt.flow(), pkt.meta_internal(), - pkt_len, ) { Ok(TcpMaybeClosed::Closed { ufid_inbound }) => { tcp_closed = true; @@ -2961,10 +2936,6 @@ pub struct TcpFlowEntryStateInner { // the network, not after it's processed. inbound_ufid: Option, tcp_state: TcpFlowState, - segs_in: u64, - segs_out: u64, - bytes_in: u64, - bytes_out: u64, } pub struct TcpFlowEntryState { @@ -2976,17 +2947,12 @@ impl TcpFlowEntryState { outbound_ufid: InnerFlowId, inbound_ufid: InnerFlowId, tcp_state: TcpFlowState, - bytes_in: u64, ) -> Self { Self { inner: KMutex::new(TcpFlowEntryStateInner { outbound_ufid, inbound_ufid: Some(inbound_ufid), tcp_state, - segs_in: 1, - segs_out: 0, - bytes_in, - bytes_out: 0, }), } } @@ -2994,17 +2960,12 @@ impl TcpFlowEntryState { fn new_outbound( outbound_ufid: InnerFlowId, tcp_state: TcpFlowState, - bytes_out: u64, ) -> Self { Self { inner: KMutex::new(TcpFlowEntryStateInner { outbound_ufid, inbound_ufid: None, tcp_state, - segs_in: 0, - segs_out: 1, - bytes_in: 0, - bytes_out, }), } } @@ -3020,21 +2981,9 @@ impl TcpFlowEntryState { port_name: &CStr, tcp: &impl TcpRef, dir: Direction, - pkt_len: u64, ufid_in: Option<&InnerFlowId>, ) -> result::Result { let mut tfes = self.inner.lock(); - match dir { - Direction::In => { - tfes.segs_in += 1; - tfes.bytes_in += pkt_len; - } - Direction::Out => { - tfes.segs_out += 1; - tfes.bytes_out += pkt_len; - } - } - if let Some(ufid_in) = ufid_in { // We need to store the UFID of the inbound packet // before it was processed so that we can retire the @@ -3080,10 +3029,6 @@ impl Dump for TcpFlowEntryStateInner { hits, inbound_ufid: self.inbound_ufid, tcp_state: TcpFlowStateDump::from(self.tcp_state), - segs_in: self.segs_in, - segs_out: self.segs_out, - bytes_in: self.bytes_in, - bytes_out: self.bytes_out, } } } diff --git a/lib/opte/src/print.rs b/lib/opte/src/print.rs index bb909341..a9289d96 100644 --- a/lib/opte/src/print.rs +++ b/lib/opte/src/print.rs @@ -283,16 +283,7 @@ fn print_tcp_flow( id: &InnerFlowId, entry: &TcpFlowEntryDump, ) -> std::io::Result<()> { - writeln!( - t, - "{id}\t{}\t{}\t{}\t{}\t{}\t{}", - entry.tcp_state.tcp_state, - entry.hits, - entry.segs_in, - entry.segs_out, - entry.bytes_in, - entry.bytes_out, - ) + writeln!(t, "{id}\t{}\t{}", entry.tcp_state.tcp_state, entry.hits,) } /// Output a horizontal rule in bold to the given writer. From 014d6ed88d618e182b027dc3f02d6eda4ba3deea Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Thu, 3 Jul 2025 16:00:38 +0100 Subject: [PATCH 32/37] Make layer in/out processing more consistent --- lib/opte/src/engine/layer.rs | 139 ++++++++++++++++++++--------------- lib/opte/src/engine/rule.rs | 3 +- 2 files changed, 81 insertions(+), 61 deletions(-) diff --git a/lib/opte/src/engine/layer.rs b/lib/opte/src/engine/layer.rs index 3c622e9d..d8c09d83 100644 --- a/lib/opte/src/engine/layer.rs +++ b/lib/opte/src/engine/layer.rs @@ -866,23 +866,23 @@ impl Layer { ameta: &mut ActionMeta, ) -> result::Result { // We have no FlowId, thus there can be no FlowTable entry. - if *pkt.flow() == FLOW_ID_DEFAULT { + if pkt.flow() == &FLOW_ID_DEFAULT { return self.process_in_rules(ectx, pkt, xforms, ameta); } // Do we have a FlowTable entry? If so, use it. - let flow = *pkt.flow(); - let (action, stat) = match self.ft.get_in(&flow) { + let flow = pkt.flow(); + let (action, stat) = match self.ft.get_in(flow) { EntryState::Dirty(ActionDescEntry::Desc(action), stat) if action.is_valid() => { - self.ft.mark_clean(Direction::In, &flow); + self.ft.mark_clean(Direction::In, flow); (Some(ActionDescEntry::Desc(action)), Some(stat)) } EntryState::Dirty(_, _) => { // NoOps are included in this case as we can't ask the actor whether // it remains valid: the simplest method to do so is to rerun lookup. - self.ft.remove_in(&flow); + self.ft.remove_in(flow); (None, None) } EntryState::Clean(action, stat) => (Some(action), Some(stat)), @@ -903,6 +903,8 @@ impl Layer { self.stats.vals.in_lft_hit += 1; let flow_before = *pkt.flow(); let ht = desc.gen_ht(Direction::In); + let bt = desc.gen_bt(Direction::In, pkt.meta())?; + pkt.hdr_transform(&ht)?; xforms.hdr.push(ht); ht_probe( @@ -913,7 +915,7 @@ impl Layer { pkt.flow(), ); - if let Some(bt) = desc.gen_bt(Direction::In, pkt.meta())? { + if let Some(bt) = bt { pkt.body_transform(Direction::In, &*bt)?; xforms.body.push(bt); } @@ -971,9 +973,16 @@ impl Layer { // the "top" of layer represents how the client sees // the traffic, and the "bottom" of the layer // represents how the network sees the traffic. - let flow_out = pkt.flow().mirror(); - let desc = ActionDescEntry::NoOp; - self.ft.add_pair(desc, *pkt.flow(), flow_out, stat); + // + // No transformation occurs in a `StatefulAllow`, unlike + // `Stateful(x)`. The mirror flow is computed from the + // initial state. + self.ft.add_pair( + ActionDescEntry::NoOp, + flow_before, + flow_before.mirror(), + stat, + ); self.stats.vals.flows += 1; Ok(LayerResult::Allow) } @@ -981,7 +990,7 @@ impl Layer { Action::Deny => { self.stats.vals.in_deny += 1; let reason = if rule.is_some() { - self.rule_deny_probe(In, pkt.flow()); + self.rule_deny_probe(In, &flow_before); DenyReason::Rule } else { DenyReason::Default @@ -990,18 +999,20 @@ impl Layer { Ok(LayerResult::Deny { name: self.name, reason }) } - Action::Meta(action) => match action.mod_meta(pkt.flow(), ameta) { - Ok(res) => match res { - AllowOrDeny::Allow(_) => Ok(LayerResult::Allow), + Action::Meta(action) => { + match action.mod_meta(&flow_before, ameta) { + Ok(res) => match res { + AllowOrDeny::Allow(_) => Ok(LayerResult::Allow), - AllowOrDeny::Deny => Ok(LayerResult::Deny { - name: self.name, - reason: DenyReason::Action, - }), - }, + AllowOrDeny::Deny => Ok(LayerResult::Deny { + name: self.name, + reason: DenyReason::Action, + }), + }, - Err(msg) => Err(LayerError::ModMeta(msg)), - }, + Err(msg) => Err(LayerError::ModMeta(msg)), + } + } Action::Static(action) => { let ht = @@ -1020,7 +1031,7 @@ impl Layer { self.record_gen_ht_failure( ectx.user_ctx, In, - pkt.flow(), + &flow_before, &e, ); return Err(LayerError::GenHdrTransform { @@ -1098,15 +1109,22 @@ impl Layer { self.record_gen_desc_failure( ectx.user_ctx, In, - pkt.flow(), + &flow_before, &e, ); return Err(LayerError::GenDesc(e)); } }; - let flow_before = *pkt.flow(); + // Generate the transforms, and then roll up our stats into an + // internal node. This allows for correct accounting in the event + // of an error. let ht_in = desc.gen_ht(In); + let bt = desc.gen_bt(In, pkt.meta())?; + + let stat = + pkt.meta_internal_mut().stats.new_layer_lft(ectx.stats); + pkt.hdr_transform(&ht_in)?; xforms.hdr.push(ht_in); ht_probe( @@ -1117,14 +1135,11 @@ impl Layer { pkt.flow(), ); - if let Some(bt) = desc.gen_bt(In, pkt.meta())? { + if let Some(bt) = bt { pkt.body_transform(In, &*bt)?; xforms.body.push(bt); } - let stat = - pkt.meta_internal_mut().stats.new_layer_lft(ectx.stats); - // The outbound flow ID must be calculated _after_ the // header transformation. Remember, the "top" // (outbound) of layer represents how the client sees @@ -1171,23 +1186,23 @@ impl Layer { ameta: &mut ActionMeta, ) -> result::Result { // We have no FlowId, thus there can be no FlowTable entry. - if *pkt.flow() == FLOW_ID_DEFAULT { + if pkt.flow() == &FLOW_ID_DEFAULT { return self.process_out_rules(ectx, pkt, xforms, ameta); } // Do we have a FlowTable entry? If so, use it. - let flow = *pkt.flow(); - let (action, stat) = match self.ft.get_out(&flow) { + let flow = pkt.flow(); + let (action, stat) = match self.ft.get_out(flow) { EntryState::Dirty(ActionDescEntry::Desc(action), stat) if action.is_valid() => { - self.ft.mark_clean(Direction::Out, &flow); + self.ft.mark_clean(Direction::Out, flow); (Some(ActionDescEntry::Desc(action)), Some(stat)) } EntryState::Dirty(_, _) => { // NoOps are included in this case as we can't ask the actor whether // it remains valid: the simplest method to do so is to rerun lookup. - self.ft.remove_out(&flow); + self.ft.remove_out(flow); (None, None) } EntryState::Clean(action, stat) => (Some(action), Some(stat)), @@ -1208,6 +1223,8 @@ impl Layer { self.stats.vals.out_lft_hit += 1; let flow_before = *pkt.flow(); let ht = desc.gen_ht(Direction::Out); + let bt = desc.gen_bt(Direction::Out, pkt.meta())?; + pkt.hdr_transform(&ht)?; xforms.hdr.push(ht); ht_probe( @@ -1218,7 +1235,7 @@ impl Layer { pkt.flow(), ); - if let Some(bt) = desc.gen_bt(Direction::Out, pkt.meta())? { + if let Some(bt) = bt { pkt.body_transform(Direction::Out, &*bt)?; xforms.body.push(bt); } @@ -1272,18 +1289,13 @@ impl Layer { let stat = pkt.meta_internal_mut().stats.new_layer_lft(ectx.stats); - // The inbound flow ID must be calculated _after_ the - // header transformation. Remember, the "top" - // (outbound) of layer represents how the client sees - // the traffic, and the "bottom" (inbound) of the - // layer represents how the network sees the traffic. - // The final step is to mirror the IPs and ports to - // reflect the traffic direction change. - let flow_in = pkt.flow().mirror(); + // No transformation occurs in a `StatefulAllow`, unlike + // `Stateful(x)`. The mirror flow is computed from the + // initial state. self.ft.add_pair( ActionDescEntry::NoOp, - flow_in, - *pkt.flow(), + flow_before.mirror(), + flow_before, stat, ); self.stats.vals.flows += 1; @@ -1293,7 +1305,7 @@ impl Layer { Action::Deny => { self.stats.vals.out_deny += 1; let reason = if rule.is_some() { - self.rule_deny_probe(Out, pkt.flow()); + self.rule_deny_probe(Out, &flow_before); DenyReason::Rule } else { DenyReason::Default @@ -1302,18 +1314,20 @@ impl Layer { Ok(LayerResult::Deny { name: self.name, reason }) } - Action::Meta(action) => match action.mod_meta(pkt.flow(), ameta) { - Ok(res) => match res { - AllowOrDeny::Allow(_) => Ok(LayerResult::Allow), + Action::Meta(action) => { + match action.mod_meta(&flow_before, ameta) { + Ok(res) => match res { + AllowOrDeny::Allow(_) => Ok(LayerResult::Allow), - AllowOrDeny::Deny => Ok(LayerResult::Deny { - name: self.name, - reason: DenyReason::Action, - }), - }, + AllowOrDeny::Deny => Ok(LayerResult::Deny { + name: self.name, + reason: DenyReason::Action, + }), + }, - Err(msg) => Err(LayerError::ModMeta(msg)), - }, + Err(msg) => Err(LayerError::ModMeta(msg)), + } + } Action::Static(action) => { let ht = @@ -1332,7 +1346,7 @@ impl Layer { self.record_gen_ht_failure( ectx.user_ctx, Out, - pkt.flow(), + &flow_before, &e, ); return Err(LayerError::GenHdrTransform { @@ -1393,9 +1407,6 @@ impl Layer { }); } - let stat = - pkt.meta_internal_mut().stats.new_layer_lft(ectx.stats); - let desc = match action.gen_desc(&flow_before, pkt.meta(), ameta) { Ok(aord) => match aord { @@ -1413,14 +1424,22 @@ impl Layer { self.record_gen_desc_failure( ectx.user_ctx, Out, - pkt.flow(), + &flow_before, &e, ); return Err(LayerError::GenDesc(e)); } }; + // Generate the transforms, and then roll up our stats into an + // internal node. This allows for correct accounting in the event + // of an error. let ht_out = desc.gen_ht(Out); + let bt = desc.gen_bt(Out, pkt.meta())?; + + let stat = + pkt.meta_internal_mut().stats.new_layer_lft(ectx.stats); + pkt.hdr_transform(&ht_out)?; xforms.hdr.push(ht_out); ht_probe( @@ -1431,7 +1450,7 @@ impl Layer { pkt.flow(), ); - if let Some(bt) = desc.gen_bt(Out, pkt.meta())? { + if let Some(bt) = bt { pkt.body_transform(Out, &*bt)?; xforms.body.push(bt); } diff --git a/lib/opte/src/engine/rule.rs b/lib/opte/src/engine/rule.rs index cc461eb9..0a10eee1 100644 --- a/lib/opte/src/engine/rule.rs +++ b/lib/opte/src/engine/rule.rs @@ -175,7 +175,8 @@ pub trait ActionDesc { /// Generate a body transformation. /// /// An action may optionally generate a [`BodyTransform`] in - /// order to act on the body of the packet. + /// order to act on the body of the packet. This function is called + /// *before* the generated [`HdrTransform`] is applied. fn gen_bt( &self, _dir: Direction, From 0398b073c50c79f007df962d2ddab27c87363bf6 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Thu, 3 Jul 2025 16:19:38 +0100 Subject: [PATCH 33/37] Errors as a decision (!) --- crates/opte-api/src/stat.rs | 1 + lib/opte/src/engine/port/mod.rs | 29 ++++++++++++++++++++++------- lib/opte/src/engine/stat.rs | 14 +++++++++++--- 3 files changed, 34 insertions(+), 10 deletions(-) diff --git a/crates/opte-api/src/stat.rs b/crates/opte-api/src/stat.rs index fd30d0be..2c71a3b4 100644 --- a/crates/opte-api/src/stat.rs +++ b/crates/opte-api/src/stat.rs @@ -35,5 +35,6 @@ pub struct FullCounter { pub allow: u64, pub deny: u64, pub hairpin: u64, + pub error: u64, pub packets: PacketCounter, } diff --git a/lib/opte/src/engine/port/mod.rs b/lib/opte/src/engine/port/mod.rs index e0618967..e524a536 100644 --- a/lib/opte/src/engine/port/mod.rs +++ b/lib/opte/src/engine/port/mod.rs @@ -2348,6 +2348,8 @@ impl Port { use Direction::In; self.stats.vals.in_uft_miss.incr(1); + let pkt_len = pkt.len() as u64; + let mut xforms = Transforms::new(); let res = self.layers_process(data, In, pkt, &mut xforms, ameta); @@ -2379,11 +2381,17 @@ impl Port { false, ), - // TODO: Errors as a decision?! - Err(e) => return Err(ProcessError::Layer(e)), + Err(e) => { + _ = pkt.meta_internal_mut().stats.terminate( + StatAction::Error, + pkt_len, + In, + false, + ); + return Err(ProcessError::Layer(e)); + } }; - let pkt_len = pkt.len() as u64; let Some(stat_parents) = pkt.meta_internal_mut().stats.terminate( ipr.stat_action(), pkt_len, @@ -2558,10 +2566,10 @@ impl Port { use Direction::Out; self.stats.vals.out_uft_miss.incr(1); - let mut tcp_closed = false; - let pkt_len = pkt.len() as u64; + let mut tcp_closed = false; + // For outbound traffic the TCP flow table must be checked // _before_ processing take place. let tcp_flow = if pkt.meta_internal().headers.is_inner_tcp() { @@ -2658,8 +2666,15 @@ impl Port { false, ), - // TODO: Errors as a decision?! - Err(e) => return Err(ProcessError::Layer(e)), + Err(e) => { + _ = pkt.meta_internal_mut().stats.terminate( + StatAction::Error, + pkt_len, + Out, + false, + ); + return Err(ProcessError::Layer(e)); + } }; let Some(stat_parents) = pkt.meta_internal_mut().stats.terminate( diff --git a/lib/opte/src/engine/stat.rs b/lib/opte/src/engine/stat.rs index 73c59863..518efd10 100644 --- a/lib/opte/src/engine/stat.rs +++ b/lib/opte/src/engine/stat.rs @@ -43,16 +43,17 @@ impl StatId { /// Reduced form of an action for stats tracking purposes. #[derive(Copy, Clone, Debug, PartialEq, PartialOrd, Ord, Eq, Hash, Default)] -pub enum Action { +pub(crate) enum Action { #[default] Allow, Deny, Hairpin, + Error, } /// Packet counters and additional information associated with an accepted /// flow's 5-tuple. -pub struct FlowStat { +pub(crate) struct FlowStat { /// The direction of this flow half. dir: Direction, /// The other half of this flow. @@ -336,6 +337,7 @@ impl TableStat { Action::Allow => &self.stats.allow, Action::Deny => &self.stats.deny, Action::Hairpin => &self.stats.hairpin, + Action::Error => &self.stats.error, } .fetch_add(1, Ordering::Relaxed); } @@ -421,6 +423,7 @@ struct FullCounter { allow: AtomicU64, deny: AtomicU64, hairpin: AtomicU64, + error: AtomicU64, packets: PacketCounter, } @@ -430,6 +433,7 @@ impl FullCounter { allow: 0.into(), deny: 0.into(), hairpin: 0.into(), + error: 0.into(), packets: PacketCounter::from_next_id(id), } } @@ -443,6 +447,8 @@ impl FullCounter { .fetch_add(self.deny.load(Ordering::Relaxed), Ordering::Relaxed); into.hairpin .fetch_add(self.hairpin.load(Ordering::Relaxed), Ordering::Relaxed); + into.error + .fetch_add(self.error.load(Ordering::Relaxed), Ordering::Relaxed); } /// Increment the values of `into` using all matching counters in `self`. @@ -451,6 +457,7 @@ impl FullCounter { into.allow += self.allow.load(Ordering::Relaxed); into.deny += self.deny.load(Ordering::Relaxed); into.hairpin += self.hairpin.load(Ordering::Relaxed); + into.error += self.error.load(Ordering::Relaxed); } #[inline] @@ -466,6 +473,7 @@ impl From<&FullCounter> for ApiFullCounter { allow: val.allow.load(Ordering::Relaxed), deny: val.deny.load(Ordering::Relaxed), hairpin: val.hairpin.load(Ordering::Relaxed), + error: val.error.load(Ordering::Relaxed), } } } @@ -860,7 +868,7 @@ impl FlowStatBuilder { .for_each(|v| v.act_at(action, pkt_size, direction, now)); None } - Action::Deny | Action::Hairpin => { + Action::Deny | Action::Hairpin | Action::Error => { let (accepted, last_layer) = self.parents.split_at(self.layer_end); accepted.iter().for_each(|v| { From 333a9f0a6c0727f3fa7d9405ef067015deacff2d Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Thu, 3 Jul 2025 16:46:51 +0100 Subject: [PATCH 34/37] Record packet delivery class as a tag/stat --- lib/oxide-vpc/src/api/stat.rs | 7 +++ lib/oxide-vpc/src/engine/overlay.rs | 81 ++++++++++++++++------------- 2 files changed, 51 insertions(+), 37 deletions(-) diff --git a/lib/oxide-vpc/src/api/stat.rs b/lib/oxide-vpc/src/api/stat.rs index b88abede..86d85c25 100644 --- a/lib/oxide-vpc/src/api/stat.rs +++ b/lib/oxide-vpc/src/api/stat.rs @@ -31,3 +31,10 @@ pub static NAT_VALID_IGW_V6: Uuid = Uuid::from_fields(0x01de_f00d, 0x7777, 0x0003, &3u64.to_be_bytes()); pub static NAT_NONE: Uuid = Uuid::from_fields(0x01de_f00d, 0x7777, 0x0003, &255u64.to_be_bytes()); + +pub static DESTINATION_INTERNET: Uuid = + Uuid::from_fields(0x01de_f00d, 0x7777, 0x0004, &0u64.to_be_bytes()); +pub static DESTINATION_VPC_LOCAL: Uuid = + Uuid::from_fields(0x01de_f00d, 0x7777, 0x0004, &1u64.to_be_bytes()); +pub static DESTINATION_VPC_PEER: Uuid = + Uuid::from_fields(0x01de_f00d, 0x7777, 0x0004, &2u64.to_be_bytes()); diff --git a/lib/oxide-vpc/src/engine/overlay.rs b/lib/oxide-vpc/src/engine/overlay.rs index f9d26e51..18229b8a 100644 --- a/lib/oxide-vpc/src/engine/overlay.rs +++ b/lib/oxide-vpc/src/engine/overlay.rs @@ -15,6 +15,7 @@ use crate::api::PhysNet; use crate::api::TunnelEndpoint; use crate::api::V2bMapResp; use crate::api::VpcMapResp; +use crate::api::stat::*; use crate::cfg::VpcCfg; use alloc::collections::BTreeSet; use alloc::collections::btree_map::BTreeMap; @@ -65,6 +66,7 @@ use opte::engine::rule::Resource; use opte::engine::rule::ResourceEntry; use opte::engine::rule::Rule; use opte::engine::rule::StaticAction; +use opte::engine::stat::RootStat; use poptrie::Poptrie; pub const OVERLAY_LAYER_NAME: &str = "overlay"; @@ -77,12 +79,16 @@ pub fn setup( ft_limit: core::num::NonZeroU32, ) -> core::result::Result<(), OpteError> { // Action Index 0 - let encap = Action::Static(Arc::new(EncapAction::new( - cfg.phys_ip, - cfg.vni, + let internet_stat = pb.stats_mut().new_root(Some(DESTINATION_INTERNET)); + let vpc_local_stat = pb.stats_mut().new_root(Some(DESTINATION_VPC_LOCAL)); + let encap = Action::Static(Arc::new(EncapAction { + phys_ip_src: cfg.phys_ip, + vni: cfg.vni, v2p, v2b, - ))); + internet_stat, + vpc_local_stat, + })); // Action Index 1 let decap = Action::Static(Arc::new(DecapAction::new())); @@ -176,17 +182,9 @@ pub struct EncapAction { vni: Vni, v2p: Arc, v2b: Arc, -} -impl EncapAction { - pub fn new( - phys_ip_src: Ipv6Addr, - vni: Vni, - v2p: Arc, - v2b: Arc, - ) -> Self { - Self { phys_ip_src, vni, v2p, v2b } - } + internet_stat: Arc, + vpc_local_stat: Arc, } impl fmt::Display for EncapAction { @@ -201,7 +199,7 @@ impl StaticAction for EncapAction { // The encap action is only used for outgoing. _dir: Direction, flow_id: &InnerFlowId, - _pkt_meta: MblkPacketDataView, + mut pkt: MblkPacketDataView, action_meta: &mut ActionMeta, ) -> GenHtResult { let f_hash = flow_id.crc32(); @@ -235,6 +233,7 @@ impl StaticAction for EncapAction { let (is_internal, phys_target) = match target { RouterTargetInternal::InternetGateway(_) => { + pkt.push_stat(Arc::clone(&self.internet_stat)); match self.v2b.get(&flow_id.dst_ip()) { Some(phys) => { // Hash the packet onto a route target. This is a very @@ -258,30 +257,38 @@ impl StaticAction for EncapAction { } } - RouterTargetInternal::Ip(virt_ip) => match self.v2p.get(&virt_ip) { - Some(phys) => ( - true, - PhysNet { ether: phys.ether, ip: phys.ip, vni: self.vni }, - ), - - // The router target has specified a VPC IP we do not - // currently know about; this could be for two - // reasons: - // - // 1. No such IP currently exists in the guest's VPC. - // - // 2. The destination IP exists in the guest's VPC, - // but we do not yet have a mapping for it. - // - // We cannot differentiate these cases from the point - // of view of this code without more information from - // the control plane; rather we drop the packet. If we - // are dealing with scenario (2), the control plane - // should eventually provide us with a mapping. - None => return Ok(AllowOrDeny::Deny), - }, + RouterTargetInternal::Ip(virt_ip) => { + pkt.push_stat(Arc::clone(&self.vpc_local_stat)); + match self.v2p.get(&virt_ip) { + Some(phys) => ( + true, + PhysNet { + ether: phys.ether, + ip: phys.ip, + vni: self.vni, + }, + ), + + // The router target has specified a VPC IP we do not + // currently know about; this could be for two + // reasons: + // + // 1. No such IP currently exists in the guest's VPC. + // + // 2. The destination IP exists in the guest's VPC, + // but we do not yet have a mapping for it. + // + // We cannot differentiate these cases from the point + // of view of this code without more information from + // the control plane; rather we drop the packet. If we + // are dealing with scenario (2), the control plane + // should eventually provide us with a mapping. + None => return Ok(AllowOrDeny::Deny), + } + } RouterTargetInternal::VpcSubnet(_) => { + pkt.push_stat(Arc::clone(&self.vpc_local_stat)); match self.v2p.get(&flow_id.dst_ip()) { Some(phys) => ( true, From 7dac0244ea299819f8fc25fe40c2674cea561404 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Thu, 10 Jul 2025 15:08:52 +0100 Subject: [PATCH 35/37] Ioctls, at last. --- Cargo.lock | 2 + bin/opteadm/Cargo.toml | 1 + bin/opteadm/src/bin/opteadm.rs | 66 ++++++++++++++++-- crates/opte-api/src/cmd.rs | 109 +++++++++++++++++++++++------ lib/opte-ioctl/Cargo.toml | 1 + lib/opte-ioctl/src/lib.rs | 71 +++++++++++++++++-- lib/opte/src/engine/port/mod.rs | 8 ++- lib/opte/src/engine/stat.rs | 38 ++++++++++- lib/oxide-vpc/src/api/mod.rs | 8 --- xde/src/xde.rs | 117 ++++++++++++++++++++++++++------ 10 files changed, 358 insertions(+), 63 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f1cf58e2..22a72cda 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1457,6 +1457,7 @@ dependencies = [ "postcard", "serde", "thiserror 2.0.12", + "uuid", ] [[package]] @@ -1485,6 +1486,7 @@ dependencies = [ "serde", "tabwriter", "thiserror 2.0.12", + "uuid", ] [[package]] diff --git a/bin/opteadm/Cargo.toml b/bin/opteadm/Cargo.toml index c0e5ac19..d11a7fd6 100644 --- a/bin/opteadm/Cargo.toml +++ b/bin/opteadm/Cargo.toml @@ -23,6 +23,7 @@ postcard.workspace = true serde.workspace = true tabwriter.workspace = true thiserror.workspace = true +uuid.workspace = true [build-dependencies] anyhow.workspace = true diff --git a/bin/opteadm/src/bin/opteadm.rs b/bin/opteadm/src/bin/opteadm.rs index 6372a5a5..0dee15f3 100644 --- a/bin/opteadm/src/bin/opteadm.rs +++ b/bin/opteadm/src/bin/opteadm.rs @@ -10,6 +10,7 @@ use clap::Parser; use opte::api::API_VERSION; use opte::api::Direction; use opte::api::DomainName; +use opte::api::FlowPair; use opte::api::IpAddr; use opte::api::IpCidr; use opte::api::Ipv4Addr; @@ -32,7 +33,6 @@ use oxide_vpc::api::ClearVirt2PhysReq; use oxide_vpc::api::DelRouterEntryReq; use oxide_vpc::api::DelRouterEntryResp; use oxide_vpc::api::DhcpCfg; -use oxide_vpc::api::DumpFlowStatsResp; use oxide_vpc::api::ExternalIpCfg; use oxide_vpc::api::Filters as FirewallFilters; use oxide_vpc::api::FirewallAction; @@ -279,11 +279,40 @@ enum Command { direction: Option, }, - /// XXX TEMP + /// Return the IDs of all registered stat objects. + ListRootStats { + /// The OPTE port to query. + #[arg(short)] + port: String, + }, + + /// Return the IDs of all current flows. + ListFlowStats { + /// The OPTE port to query. + #[arg(short)] + port: String, + }, + + /// Request the current state of root stats contained in a port. + DumpRootStats { + /// The OPTE port to query. + #[arg(short)] + port: String, + // /// A comma-separated list of stat UUIDs of interest. If omitted, + // /// request all available stats. + // #[arg(long)] + // ids: Uuid, + }, + + /// Return the IDs of all current flows. DumpFlowStats { - /// The OPTE port to read... + /// The OPTE port to query. #[arg(short)] port: String, + // /// A comma-separated list of flowkeys of interest. If omitted, + // /// request all available stats. + // #[arg(long)] + // ids: Vec, }, } @@ -870,10 +899,35 @@ fn main() -> anyhow::Result<()> { } } - // XXX TEMP + Command::ListRootStats { port } => { + let vals = hdl.list_root_stats(&port)?; + + for val in vals.root_ids { + println!("{val}"); + } + } + + Command::ListFlowStats { port } => { + let vals = hdl.list_flow_stats(&port)?; + + println!("Inbound -> Outbound"); + for FlowPair { inbound, outbound } in vals.flow_ids { + println!("{inbound} -> {outbound}"); + } + } + + Command::DumpRootStats { port } => { + let vals = hdl.dump_root_stats(&port, [])?; + for (id, stat) in vals.root_stats { + println!("{id}:\n\t{stat:?}"); + } + } + Command::DumpFlowStats { port } => { - let DumpFlowStatsResp { data } = hdl.dump_flowstats(&port)?; - println!("{data}"); + let vals = hdl.dump_flow_stats(&port, [])?; + for (id, stat) in vals.flow_stats { + println!("{id}:\n\t{stat:?}"); + } } } diff --git a/crates/opte-api/src/cmd.rs b/crates/opte-api/src/cmd.rs index ae20bde5..b01c5e44 100644 --- a/crates/opte-api/src/cmd.rs +++ b/crates/opte-api/src/cmd.rs @@ -5,11 +5,15 @@ // Copyright 2025 Oxide Computer Company use super::API_VERSION; +use super::FlowStat; +use super::FullCounter; use super::RuleId; use super::TcpState; use super::encap::Vni; use super::ip::IpCidr; use super::mac::MacAddr; +use alloc::collections::BTreeMap; +use alloc::collections::BTreeSet; use alloc::string::String; use alloc::string::ToString; use alloc::vec::Vec; @@ -18,6 +22,7 @@ use illumos_sys_hdrs::c_int; use illumos_sys_hdrs::size_t; use serde::Deserialize; use serde::Serialize; +use uuid::Uuid; pub const XDE_IOC: u32 = 0xde777700; pub const XDE_IOC_OPTE_CMD: i32 = XDE_IOC as i32 | 0x01; @@ -50,9 +55,10 @@ pub enum OpteCmd { SetExternalIps = 80, // set xde external IPs for a port AllowCidr = 90, // allow ip block through gateway tx/rx RemoveCidr = 91, // deny ip block through gateway tx/rx - - // TEMP - DumpFlowStats = 34, + ListRootStat = 100, // list the ids of all registered root stats + ListFlowStat = 101, // list the flow-keys of all current flows + DumpRootStat = 102, // request current counter set(s) with a given ID + DumpFlowStat = 103, // request flow stats for one or more flows } impl TryFrom for OpteCmd { @@ -85,6 +91,10 @@ impl TryFrom for OpteCmd { 80 => Ok(Self::SetExternalIps), 90 => Ok(Self::AllowCidr), 91 => Ok(Self::RemoveCidr), + 100 => Ok(Self::ListRootStat), + 101 => Ok(Self::ListFlowStat), + 102 => Ok(Self::DumpRootStat), + 103 => Ok(Self::DumpFlowStat), _ => Err(()), } } @@ -264,6 +274,13 @@ pub struct NoResp { impl CmdOk for NoResp {} +/// Arbitrary request directed at a port which requires no additional +/// selectors. +#[derive(Debug, Deserialize, Serialize)] +pub struct PortReq { + pub port_name: String, +} + /// Dump various information about a layer, for use in debugging or /// administrative purposes. #[derive(Debug, Deserialize, Serialize)] @@ -299,10 +316,7 @@ pub struct DumpLayerResp { impl CmdOk for DumpLayerResp {} -#[derive(Debug, Deserialize, Serialize)] -pub struct ListLayersReq { - pub port_name: String, -} +pub type ListLayersReq = PortReq; #[derive(Debug, Deserialize, Serialize)] pub struct LayerDesc { @@ -327,10 +341,7 @@ pub struct ListLayersResp { impl CmdOk for ListLayersResp {} -#[derive(Debug, Deserialize, Serialize)] -pub struct ClearUftReq { - pub port_name: String, -} +pub type ClearUftReq = PortReq; #[derive(Debug, Deserialize, Serialize)] pub struct ClearLftReq { @@ -338,10 +349,7 @@ pub struct ClearLftReq { pub layer_name: String, } -#[derive(Debug, Deserialize, Serialize)] -pub struct DumpUftReq { - pub port_name: String, -} +pub type DumpUftReq = PortReq; #[derive(Debug, Deserialize, Serialize)] pub struct DumpUftResp { @@ -361,10 +369,7 @@ pub struct UftEntryDump { pub summary: String, } -#[derive(Debug, Deserialize, Serialize)] -pub struct DumpTcpFlowsReq { - pub port_name: String, -} +pub type DumpTcpFlowsReq = PortReq; #[derive(Debug, Deserialize, Serialize)] pub struct DumpTcpFlowsResp { @@ -409,3 +414,69 @@ pub struct RuleDump { pub data_predicates: Vec, pub action: String, } + +pub type ListRootStatReq = PortReq; + +#[derive(Debug, Deserialize, Serialize)] +pub struct ListRootStatResp { + pub root_ids: Vec, +} + +impl CmdOk for ListRootStatResp {} + +pub type ListFlowStatReq = PortReq; + +#[derive(Debug, Deserialize, Serialize, Ord, Eq, PartialEq, PartialOrd)] +pub struct FlowPair { + pub inbound: Flow, + pub outbound: Flow, +} + +#[derive(Debug, Deserialize, Serialize)] +pub struct ListFlowStatResp { + pub flow_ids: BTreeSet>, +} + +impl CmdOk for ListFlowStatResp {} + +/// Request the current state of some (or all) root stats contained +/// in a port. +#[derive(Debug, Deserialize, Serialize)] +pub struct DumpRootStatReq { + /// The name of the target port. + pub port_name: String, + /// The set of root stat IDs to query. + /// + /// If empty, collect the state of all stats. + pub root_ids: BTreeSet, +} + +/// The current state of queried root stats. +#[derive(Debug, Deserialize, Serialize)] +pub struct DumpRootStatResp { + /// The set of queried root stats. + pub root_stats: BTreeMap, +} + +impl CmdOk for DumpRootStatResp {} + +/// Request the current state of some (or all) flow stats contained +/// in a port. +#[derive(Debug, Deserialize, Serialize)] +pub struct DumpFlowStatReq { + /// The name of the target port. + pub port_name: String, + /// The set of flow-keys to query. + /// + /// If empty, collect the state of all flows. + pub flow_ids: BTreeSet, +} + +/// The current state of queried flow stats. +#[derive(Debug, Deserialize, Serialize)] +pub struct DumpFlowStatResp { + /// The set of queried flow stats. + pub flow_stats: BTreeMap>, +} + +impl CmdOk for DumpFlowStatResp {} diff --git a/lib/opte-ioctl/Cargo.toml b/lib/opte-ioctl/Cargo.toml index 3e410cc1..4c2f734d 100644 --- a/lib/opte-ioctl/Cargo.toml +++ b/lib/opte-ioctl/Cargo.toml @@ -15,3 +15,4 @@ postcard.workspace = true serde.workspace = true thiserror.workspace = true libnet.workspace = true +uuid.workspace = true diff --git a/lib/opte-ioctl/src/lib.rs b/lib/opte-ioctl/src/lib.rs index 863cb9e7..34235048 100644 --- a/lib/opte-ioctl/src/lib.rs +++ b/lib/opte-ioctl/src/lib.rs @@ -9,15 +9,23 @@ use opte::api::ClearLftReq; use opte::api::ClearUftReq; use opte::api::CmdOk; use opte::api::Direction; +use opte::api::DumpFlowStatReq; +use opte::api::DumpFlowStatResp; use opte::api::DumpLayerReq; use opte::api::DumpLayerResp; +use opte::api::DumpRootStatReq; +use opte::api::DumpRootStatResp; use opte::api::DumpTcpFlowsReq; use opte::api::DumpTcpFlowsResp; use opte::api::DumpUftReq; use opte::api::DumpUftResp; pub use opte::api::InnerFlowId; +use opte::api::ListFlowStatReq; +use opte::api::ListFlowStatResp; use opte::api::ListLayersReq; use opte::api::ListLayersResp; +use opte::api::ListRootStatReq; +use opte::api::ListRootStatResp; use opte::api::NoResp; use opte::api::OpteCmd; use opte::api::OpteCmdIoctl; @@ -34,7 +42,6 @@ use oxide_vpc::api::DelRouterEntryReq; use oxide_vpc::api::DelRouterEntryResp; use oxide_vpc::api::DeleteXdeReq; use oxide_vpc::api::DhcpCfg; -use oxide_vpc::api::DumpFlowStatsResp; use oxide_vpc::api::DumpVirt2BoundaryResp; use oxide_vpc::api::DumpVirt2PhysResp; use oxide_vpc::api::IpCidr; @@ -53,6 +60,7 @@ use std::fs::File; use std::fs::OpenOptions; use std::os::unix::io::AsRawFd; use thiserror::Error; +use uuid::Uuid; /// Errors related to administering the OPTE driver. #[derive(Debug, Error)] @@ -370,16 +378,65 @@ impl OpteHdl { ) } - /// TEMP METHOD - pub fn dump_flowstats( + /// Return the IDs of all registered stat objects. + pub fn list_root_stats( &self, port_name: &str, - ) -> Result { - let cmd = OpteCmd::DumpFlowStats; + ) -> Result { run_cmd_ioctl( self.device.as_raw_fd(), - cmd, - Some(&DumpUftReq { port_name: port_name.to_string() }), + OpteCmd::ListRootStat, + Some(&ListRootStatReq { port_name: port_name.to_string() }), + ) + } + + /// Return the IDs of all current flows. + pub fn list_flow_stats( + &self, + port_name: &str, + ) -> Result, Error> { + run_cmd_ioctl( + self.device.as_raw_fd(), + OpteCmd::ListFlowStat, + Some(&ListFlowStatReq { port_name: port_name.to_string() }), + ) + } + + /// Request the current state of some (or all) root stats contained + /// in a port. + /// + /// An empty `stat_ids` will request all present stats. + pub fn dump_root_stats( + &self, + port_name: &str, + stat_ids: impl IntoIterator, + ) -> Result { + run_cmd_ioctl( + self.device.as_raw_fd(), + OpteCmd::DumpRootStat, + Some(&DumpRootStatReq { + port_name: port_name.to_string(), + root_ids: stat_ids.into_iter().collect(), + }), + ) + } + + /// Request the current state of some (or all) flow stats contained + /// in a port. + /// + /// An empty `flow_keys` will request all present flows. + pub fn dump_flow_stats( + &self, + port_name: &str, + flow_keys: impl IntoIterator, + ) -> Result, Error> { + run_cmd_ioctl( + self.device.as_raw_fd(), + OpteCmd::DumpFlowStat, + Some(&DumpFlowStatReq { + port_name: port_name.to_string(), + flow_ids: flow_keys.into_iter().collect(), + }), ) } } diff --git a/lib/opte/src/engine/port/mod.rs b/lib/opte/src/engine/port/mod.rs index e524a536..c03bd209 100644 --- a/lib/opte/src/engine/port/mod.rs +++ b/lib/opte/src/engine/port/mod.rs @@ -1007,7 +1007,7 @@ impl Port { Ok(DumpTcpFlowsResp { flows: data.tcp_flows.dump() }) } - /// XXX TEST METHOD + #[cfg(any(test, feature = "std"))] pub fn dump_flow_stats(&self) -> Result { let data = self.data.read(); check_state!( @@ -1747,6 +1747,12 @@ impl Port { .get(flow) .map(|entry| entry.state().tcp_state()) } + + /// Provides read access to all port stats. + pub fn read_stats(&self, f: impl FnOnce(&StatTree) -> T) -> T { + let data = self.data.read(); + f(&data.flow_stats) + } } #[allow(dead_code)] diff --git a/lib/opte/src/engine/stat.rs b/lib/opte/src/engine/stat.rs index 518efd10..b1f972f9 100644 --- a/lib/opte/src/engine/stat.rs +++ b/lib/opte/src/engine/stat.rs @@ -14,6 +14,7 @@ use alloc::boxed::Box; use alloc::collections::BTreeMap; use alloc::collections::BTreeSet; use alloc::collections::btree_map::Entry; +#[cfg(any(test, feature = "std"))] use alloc::string::String; use alloc::sync::Arc; use alloc::sync::Weak; @@ -21,6 +22,7 @@ use alloc::vec::Vec; use core::sync::atomic::AtomicU64; use core::sync::atomic::Ordering; use opte_api::Direction; +use opte_api::FlowPair; use opte_api::FlowStat as ApiFlowStat; use opte_api::FullCounter as ApiFullCounter; use opte_api::PacketCounter as ApiPktCounter; @@ -739,6 +741,11 @@ impl StatTree { }); } + /// Return the IDs of all present roots. + pub fn all_root_ids(&self) -> impl Iterator { + self.roots.keys().copied() + } + /// Return a snapshot of collated stats for a given root. /// /// This will include the values of all downstream children, @@ -753,11 +760,36 @@ impl StatTree { /// but may be susceptible to partial reads between individual counters. pub fn all_root_stats( &self, - ) -> impl Iterator { - self.roots.iter().map(|(k, v)| (k, v.combined_stats())) + ) -> impl Iterator { + self.roots.iter().map(|(k, v)| (*k, v.combined_stats())) + } + + /// Return the IDs of all present flows. + pub fn all_flow_pairs( + &self, + ) -> impl Iterator> { + self.flows.iter().map(|(k, v)| match v.dir { + Direction::In => FlowPair { inbound: *k, outbound: v.partner }, + Direction::Out => FlowPair { outbound: *k, inbound: v.partner }, + }) + } + + /// Return a snapshot of stats for a given flow. + pub fn flow_stat( + &self, + id: &InnerFlowId, + ) -> Option> { + self.flows.get(id).map(|v| ApiFlowStat::from(v.as_ref())) + } + + /// Return a snapshot of collated stats for all present flows. + pub fn all_flow_stats( + &self, + ) -> impl Iterator)> { + self.flows.iter().map(|(k, v)| (*k, ApiFlowStat::from(v.as_ref()))) } - // TEMP + #[cfg(any(test, feature = "std"))] pub fn dump(&self) -> String { let mut out = String::new(); out.push_str("--Roots--\n"); diff --git a/lib/oxide-vpc/src/api/mod.rs b/lib/oxide-vpc/src/api/mod.rs index f4aac8f0..d131c244 100644 --- a/lib/oxide-vpc/src/api/mod.rs +++ b/lib/oxide-vpc/src/api/mod.rs @@ -649,14 +649,6 @@ pub struct FirewallRule { pub stat_id: Option, } -// TEMP -#[derive(Debug, Deserialize, Serialize)] -pub struct DumpFlowStatsResp { - pub data: String, -} - -impl CmdOk for DumpFlowStatsResp {} - impl FromStr for FirewallRule { type Err = String; diff --git a/xde/src/xde.rs b/xde/src/xde.rs index a8f26f46..d68353c3 100644 --- a/xde/src/xde.rs +++ b/xde/src/xde.rs @@ -872,28 +872,25 @@ unsafe extern "C" fn xde_ioc_opte_cmd(karg: *mut c_void, mode: c_int) -> c_int { hdlr_resp(&mut env, resp) } - // TEMP - OpteCmd::DumpFlowStats => { - let resp = flow_stats_hdlr(&mut env); + OpteCmd::ListRootStat => { + let resp = list_root_stats_hdlr(&mut env); hdlr_resp(&mut env, resp) } - } -} -// TODO: this is just sufficient for a demo. Develop the actual interface. -#[unsafe(no_mangle)] -fn flow_stats_hdlr( - env: &mut IoctlEnvelope, -) -> Result { - let req: oxide_vpc::api::DumpUftReq = env.copy_in_req()?; - let state = get_xde_state(); - let devs = state.devs.read(); - match devs.get_by_name(&req.port_name) { - Some(dev) => dev - .port - .dump_flow_stats() - .map(|data| oxide_vpc::api::DumpFlowStatsResp { data }), - None => Err(OpteError::PortNotFound(req.port_name)), + OpteCmd::ListFlowStat => { + let resp = list_flow_stats_hdlr(&mut env); + hdlr_resp(&mut env, resp) + } + + OpteCmd::DumpRootStat => { + let resp = dump_root_stats_hdlr(&mut env); + hdlr_resp(&mut env, resp) + } + + OpteCmd::DumpFlowStat => { + let resp = dump_flow_stats_hdlr(&mut env); + hdlr_resp(&mut env, resp) + } } } @@ -2806,6 +2803,88 @@ fn remove_cidr_hdlr( gateway::remove_cidr(&dev.port, req.cidr, req.dir, state.vpc_map.clone()) } +#[unsafe(no_mangle)] +fn list_root_stats_hdlr( + env: &mut IoctlEnvelope, +) -> Result { + let req: opte::api::ListRootStatReq = env.copy_in_req()?; + let state = get_xde_state(); + let devs = state.devs.read(); + let dev = devs + .get_by_name(&req.port_name) + .ok_or_else(|| OpteError::PortNotFound(req.port_name.clone()))?; + + Ok(opte::api::ListRootStatResp { + root_ids: dev.port.read_stats(|stats| stats.all_root_ids().collect()), + }) +} + +#[unsafe(no_mangle)] +fn list_flow_stats_hdlr( + env: &mut IoctlEnvelope, +) -> Result, OpteError> { + let req: opte::api::ListFlowStatReq = env.copy_in_req()?; + let state = get_xde_state(); + let devs = state.devs.read(); + let dev = devs + .get_by_name(&req.port_name) + .ok_or_else(|| OpteError::PortNotFound(req.port_name.clone()))?; + + Ok(opte::api::ListFlowStatResp { + flow_ids: dev.port.read_stats(|stats| stats.all_flow_pairs().collect()), + }) +} + +#[unsafe(no_mangle)] +fn dump_root_stats_hdlr( + env: &mut IoctlEnvelope, +) -> Result { + let req: opte::api::DumpRootStatReq = env.copy_in_req()?; + let state = get_xde_state(); + let devs = state.devs.read(); + let dev = devs + .get_by_name(&req.port_name) + .ok_or_else(|| OpteError::PortNotFound(req.port_name.clone()))?; + + let root_stats = dev.port.read_stats(|stats| { + if req.root_ids.is_empty() { + stats.all_root_stats().collect() + } else { + req.root_ids + .iter() + .filter_map(|k| stats.root_stat(k).map(|v| (*k, v))) + .collect() + } + }); + + Ok(opte::api::DumpRootStatResp { root_stats }) +} + +#[unsafe(no_mangle)] +fn dump_flow_stats_hdlr( + env: &mut IoctlEnvelope, +) -> Result, OpteError> { + let req: opte::api::DumpFlowStatReq = env.copy_in_req()?; + let state = get_xde_state(); + let devs = state.devs.read(); + let dev = devs + .get_by_name(&req.port_name) + .ok_or_else(|| OpteError::PortNotFound(req.port_name.clone()))?; + + let flow_stats = dev.port.read_stats(|stats| { + if req.flow_ids.is_empty() { + stats.all_flow_stats().collect() + } else { + req.flow_ids + .iter() + .filter_map(|k| stats.flow_stat(k).map(|v| (*k, v))) + .collect() + } + }); + + Ok(opte::api::DumpFlowStatResp { flow_stats }) +} + #[unsafe(no_mangle)] fn list_ports_hdlr() -> Result { let mut resp = ListPortsResp { ports: vec![] }; From 40f5198036761b58b4976a903625adae43144249 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Thu, 10 Jul 2025 15:39:01 +0100 Subject: [PATCH 36/37] TCP flow state is now a *bit* more precise. --- lib/opte/src/print.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/opte/src/print.rs b/lib/opte/src/print.rs index a9289d96..e28ac1e3 100644 --- a/lib/opte/src/print.rs +++ b/lib/opte/src/print.rs @@ -270,7 +270,7 @@ pub fn print_tcp_flows_into( ) -> std::io::Result<()> { let mut t = TabWriter::new(writer); - writeln!(t, "FLOW\tSTATE\tHITS\tSEGS IN\tSEGS OUT\tBYTES IN\tBYTES OUT")?; + writeln!(t, "FLOW\tSTATE\tHITS")?; for (flow_id, entry) in &flows.flows { print_tcp_flow(&mut t, flow_id, entry)?; } From 355fc09545445beda7cd789033507f13e80cbbe7 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Thu, 10 Jul 2025 17:31:35 +0100 Subject: [PATCH 37/37] Omicron would like these to impl Eq. --- lib/oxide-vpc/src/api/mod.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/oxide-vpc/src/api/mod.rs b/lib/oxide-vpc/src/api/mod.rs index d131c244..b866ffa1 100644 --- a/lib/oxide-vpc/src/api/mod.rs +++ b/lib/oxide-vpc/src/api/mod.rs @@ -368,7 +368,7 @@ impl From for GuestPhysAddr { /// abstraction, it's simply allowing one subnet to talk to another. /// There is no separate VPC router process, the real routing is done /// by the underlay. -#[derive(Clone, Debug, Copy, Deserialize, Serialize)] +#[derive(Clone, Debug, Copy, Deserialize, Serialize, Eq, PartialEq)] pub enum RouterTarget { Drop, InternetGateway(Option), @@ -430,7 +430,7 @@ impl Display for RouterTarget { } /// The class of router which a rule belongs to. -#[derive(Clone, Debug, Copy, Deserialize, Serialize)] +#[derive(Clone, Debug, Copy, Deserialize, Serialize, Eq, PartialEq)] pub enum RouterClass { /// The rule belongs to the shared VPC-wide router. System, @@ -581,7 +581,7 @@ pub struct ClearVirt2BoundaryReq { pub tep: Vec, } -#[derive(Copy, Clone, Debug, Deserialize, Serialize)] +#[derive(Copy, Clone, Debug, Deserialize, Serialize, Eq, PartialEq)] pub struct Route { pub dest: IpCidr, pub target: RouterTarget,