Good morning, and before of all thank you for taking your time to read this.
I am implementing a simple form of INT reporting from the dataplane to the control plane, using the BMv2 software switch. The current P4 program is this one:
// SPDX-License-Identifier: Apache-2.0
/* -*- P4_16 -*- */
#include <core.p4>
#include <v1model.p4>
#define MAX_REG_SIZE 4096
#define MAX_PORTS 24
const bit<16> TYPE_IPV4 = 0x800;
const bit<16> TYPE_INT = 0x808;
const bit<8> TYPE_ICMP = 0x001;
const bit<9> CPU_PORT = 142;
const bit<32> CPU_MIRROR = 100;
/*************************************************************************
*********************** H E A D E R S ***********************************
*************************************************************************/
typedef bit<9> egressSpec_t;
typedef bit<48> macAddr_t;
typedef bit<32> ip4Addr_t;
typedef bit<48> time_t;
header ethernet_t {
macAddr_t dstAddr;
macAddr_t srcAddr;
bit<16> etherType;
}
@controller_header("packet_in")
header int_t {
bit<16> protocol;
bit<48> ingress_global_timestamp;
bit<48> egress_global_timestamp;
bit<32> enq_timestamp;
bit<19> enq_qdepth;
bit<32> deq_timedelta;
bit<19> deq_qdepth;
bit<2> payload;
}
header ipv4_t {
bit<4> version;
bit<4> ihl;
bit<8> diffserv;
bit<16> totalLen;
bit<16> identification;
bit<3> flags;
bit<13> fragOffset;
bit<8> ttl;
bit<8> protocol;
bit<16> hdrChecksum;
ip4Addr_t srcAddr;
ip4Addr_t dstAddr;
}
struct metadata {
/* empty */
}
struct headers {
ethernet_t ethernet;
int_t telemetry;
ipv4_t ipv4;
}
/*************************************************************************
*********************** P A R S E R ***********************************
*************************************************************************/
parser MyParser(packet_in packet,
out headers hdr,
inout metadata meta,
inout standard_metadata_t standard_metadata) {
state start {
packet.extract(hdr.ethernet);
transition select (hdr.ethernet.etherType) {
TYPE_IPV4: parse_ipv4;
TYPE_INT: parse_int;
default: accept;
}
}
state parse_ipv4 {
packet.extract(hdr.ipv4);
transition select (hdr.ipv4.protocol) {
default: accept;
}
}
state parse_int {
packet.extract(hdr.telemetry);
transition select (hdr.telemetry.protocol) {
default: accept;
}
}
}
/*************************************************************************
************ C H E C K S U M V E R I F I C A T I O N *************
*************************************************************************/
control MyVerifyChecksum(inout headers hdr, inout metadata meta) {
apply { }
}
/*************************************************************************
************** I N G R E S S P R O C E S S I N G *******************
*************************************************************************/
control MyIngress(inout headers hdr,
inout metadata meta,
inout standard_metadata_t standard_metadata) {
register<bit<32>>(MAX_REG_SIZE) byte_cnt_reg;
register<time_t>(MAX_REG_SIZE) last_time_reg;
action drop() {
mark_to_drop(standard_metadata);
}
action ipv4_forward(macAddr_t macAddr, egressSpec_t port) {
hdr.ethernet.dstAddr = macAddr;
standard_metadata.egress_spec = port;
hdr.ipv4.ttl = hdr.ipv4.ttl - 1;
standard_metadata.priority = 0;
}
table ipv4_lpm {
key = {
hdr.ipv4.dstAddr: lpm;
}
actions = {
ipv4_forward;
drop;
NoAction;
}
size = 1024;
default_action = drop;
}
action source_drop(bit<7> probability) {
bit<7> rand;
random(rand, 0, 100);
if (rand <= probability) {
drop();
}
}
action limit_bandwidth(bit<32> bandwidth) {
bit<32> byte_cnt;
bit<32> hashed;
time_t last_time;
hash(hashed, HashAlgorithm.crc32, (bit<32>) 0,
{ hdr.ipv4.srcAddr }, (bit<32>) MAX_REG_SIZE);
byte_cnt_reg.read(byte_cnt, hashed);
last_time_reg.read(last_time, hashed);
time_t cur_time = standard_metadata.ingress_global_timestamp;
if ((byte_cnt >> 10) >= bandwidth) {
drop();
} else if ((byte_cnt >> 10) >= (bandwidth >> 1)) {
standard_metadata.priority = 3;
}
if (cur_time - last_time >= 1000000) {
byte_cnt = 0;
last_time_reg.write(hashed, cur_time);
}
byte_cnt = byte_cnt + standard_metadata.packet_length;
byte_cnt_reg.write(hashed, byte_cnt);
}
table source_filter_exact {
key = {
hdr.ipv4.srcAddr: exact;
}
actions = {
source_drop;
limit_bandwidth;
NoAction;
}
size = 1024;
default_action = NoAction();
}
apply {
if (hdr.ipv4.isValid() && standard_metadata.instance_type == 0) {
ipv4_lpm.apply();
source_filter_exact.apply();
bit<7> rand;
random(rand, 0, 100);
if (rand <= 10)
clone(CloneType.I2E, CPU_MIRROR);
}
}
}
/*************************************************************************
**************** E G R E S S P R O C E S S I N G *******************
*************************************************************************/
control MyEgress(inout headers hdr,
inout metadata meta,
inout standard_metadata_t standard_metadata) {
action drop() {
mark_to_drop(standard_metadata);
}
action rewrite_smac(macAddr_t macAddr) {
hdr.ethernet.srcAddr = macAddr;
}
table smac_exact {
key = {
standard_metadata.egress_port: exact;
}
actions = {
rewrite_smac;
NoAction;
}
size = 1024;
default_action = NoAction();
}
apply {
smac_exact.apply();
if (standard_metadata.mcast_grp != 0 && standard_metadata.egress_port == standard_metadata.ingress_port) {
drop();
}
if (standard_metadata.instance_type != 0) {
hdr.ethernet.setInvalid();
hdr.ipv4.setInvalid();
hdr.telemetry.setValid();
hdr.telemetry.protocol = TYPE_IPV4;
hdr.telemetry.ingress_global_timestamp = standard_metadata.ingress_global_timestamp;
hdr.telemetry.egress_global_timestamp = standard_metadata.egress_global_timestamp;
hdr.telemetry.enq_timestamp = standard_metadata.enq_timestamp;
hdr.telemetry.enq_qdepth = standard_metadata.enq_qdepth;
hdr.telemetry.deq_timedelta = standard_metadata.deq_timedelta;
hdr.telemetry.deq_qdepth = standard_metadata.deq_qdepth;
hdr.telemetry.payload = (bit<2>) 0;
}
}
}
/*************************************************************************
************* C H E C K S U M C O M P U T A T I O N **************
*************************************************************************/
control MyComputeChecksum(inout headers hdr, inout metadata meta) {
apply {
update_checksum(
hdr.ipv4.isValid(),
{ hdr.ipv4.version,
hdr.ipv4.ihl,
hdr.ipv4.diffserv,
hdr.ipv4.totalLen,
hdr.ipv4.identification,
hdr.ipv4.flags,
hdr.ipv4.fragOffset,
hdr.ipv4.ttl,
hdr.ipv4.protocol,
hdr.ipv4.srcAddr,
hdr.ipv4.dstAddr },
hdr.ipv4.hdrChecksum,
HashAlgorithm.csum16);
}
}
/*************************************************************************
*********************** D E P A R S E R *******************************
*************************************************************************/
control MyDeparser(packet_out packet, in headers hdr) {
apply {
packet.emit(hdr.ethernet);
packet.emit(hdr.ipv4);
//packet.emit(hdr.telemetry); // Should be this instruction uncommented?
}
}
/*************************************************************************
*********************** S W I T C H *******************************
*************************************************************************/
V1Switch(
MyParser(),
MyVerifyChecksum(),
MyIngress(),
MyEgress(),
MyComputeChecksum(),
MyDeparser()
) main;
My current control plane implementation (written using p4runtime_sh Python wrapper: I haven’t found any implementation of PRE & Clone instructions in runtime_CLI (I need to use a P4Runtime-compatible control plane software, so simple_switch_CLI is out of order to me), is this the case? Are there any better alternatives, using directly the p4runtime_lib without the shell wrapper?) is this one:
import os
import argparse
import p4runtime_sh.shell as sh
def main(p4info, bmv2_json, server_addr, install):
if install:
sh.setup(device_id=0, grpc_addr=server_addr, election_id=(0,1), config=sh.FwdPipeConfig(p4info, bmv2_json))
else:
sh.setup(device_id=0, grpc_addr=server_addr, election_id=(0,1))
# -----------------------------------------------------------
# ------------------------- ENTRIES -------------------------
# -----------------------------------------------------------
sh.CloneSessionEntry(100).add(142, 1).insert()
mac_addresses = {
'1' : 'aa:ae:f3:59:16:88',
'2' : 'da:81:d8:cc:55:2a',
'3' : 'ba:f3:70:fa:d5:8f'
}
for port, mac in mac_addresses.items():
te = sh.TableEntry('MyEgress.smac_exact')(action='MyEgress.rewrite_smac')
te.match['standard_metadata.egress_port'] = port
te.action['macAddr'] = mac
te.insert()
dst_mac_addresses = {
'10.0.1.1': '02:f0:db:29:0e:f6',
'10.0.2.1': 'ee:05:1f:41:be:aa',
'10.0.3.1': 'fe:1d:8a:77:01:04'
}
for i in range(1, 4): // Yes, I know this is poorly written
dst_ip = '10.0.' + str(i) + '.1'
te = sh.TableEntry('MyIngress.ipv4_lpm')(action='MyIngress.ipv4_forward')
te.match['hdr.ipv4.dstAddr'] = dst_ip + '/24'
te.action['macAddr'] = dst_mac_addresses[dst_ip]
te.action['port'] = str(i)
te.insert()
# -----------------------------------------------------------
# ---------------------RECEIVING INT-------------------------
header_dict = {
'1': 'Protocol',
'2': 'Ingress Timestamp',
'3': 'Egress Timestamp',
'4': 'Enqueue Timestamp',
'5': 'Enqueue Queue Depth',
'6': 'Time in queue',
'7': 'Dequeue Queue Depth',
'8': 'Payload'
}
pkt_in = sh.PacketIn()
while True:
for pkt in pkt_in.sniff(timeout=1):
print('----- PACKET------')
for md in pkt.packet.metadata:
print(header_dict.get(str(md.metadata_id)) + ': ', end='')
print(str(int.from_bytes(md.value, byteorder='big')))
sh.teardown()
# -----------------------------------------------------------
if __name__ == '__main__':
base_path = os.getcwd().split('/')[-1]
parser = argparse.ArgumentParser(description="P4 Runtime Controller written in Python to support P4 dataplane programming")
parser.add_argument('-a', '--address', type=str, default='localhost:50051', help='Address of the switch, in the format <IP>:<PORT>')
parser.add_argument('-i', '--install', action='store_true', help='If specified, installs the pipeline in the dataplane')
parser.add_argument('--p4info', help='P4Info file in text format from p4c', type=str, action='store', required=False, default='./' + base_path + '.p4info.txtpb')
parser.add_argument('--bmv2-json', help='BMv2 JSON file from p4c', type=str, action="store", required=False, default="./" + base_path + ".json")
args = parser.parse_args()
if not os.path.exists(args.p4info):
parser.print_help()
print('\nP4Info file not found: %s\nHave you compiled with p4c?' % args.p4info)
parser.exit(1)
if not os.path.exists(args.bmv2_json):
parser.print_help()
print('\nBMv2 JSON file not found :%s\nHave you compiled with p4c?' % args.bmv2_json)
parser.exit(1)
main(args.p4info, args.bmv2_json, args.address, args.install)
However, you can see I am writing always the same and known values for the “protocol” and “payload” fields in the controller header metadata for packet_in, but when reading them from the control plane they are respectively “2048” or “0” (one time it is the first, then it is the latter), and “2”: therefore, I strongly suppose even the other values are wrong (in particular, the dequeue timedelta seems to change a lot during light load in the network traffic, and the ingress/egress timestamps seem very high even considering they are in microseconds).
I do not think it is a problem in the write phase, because traffic is flowing as intended and everything is working.
Moreover, the CloneSessionEntry().add().insert() seems to have an “append” semantics rather than a “write” semantic in the entries of the dataplane: if I execute this Python script multiple times without restarting the software switch, I get N packet_in instances for incoming packet, with N equal to the amount of executions of this Python script after the last restart of the switch. Is this intended behaviour?
To conclude, I have one more side question: are there any form of pre-parsing queues for incoming packets? How does work, for BMv2 software switch, the “pre-parsing phase”: in other words, how and where are packets stored before entering the user-defined pipeline?
Thank you all for your help!
EDIT: I am sorry, I forgot to mention the launch parameters of simple_switch_grpc:
sudo simple_switch_grpc --no-p4 -i 1@ens19 -i 2@ens20 -i 3@ens21 --log-console -- --grpc-server-addr localhost:50051 --cpu-port 142 --priority-queues 4
EDIT 2: changing the Python script code to only print the packets captured through the sniff() function of PacketIn(), the output is the following: