Why the counter does not work as expect?

I wrote a Tofino P4 program, and I can successfully compile it with open-p4studio.

#include <core.p4>
#if __TARGET_TOFINO__ == 3
#include <t3na.p4>
#elif __TARGET_TOFINO__ == 2
#include <t2na.p4>
#else
#include <tna.p4>
#endif

#include "common/headers.p4"
#include "common/util.p4"

struct headers_t {
    ethernet_h ethernet;
}

struct metadata_t {
    bit<32> qlen;
}

parser SwitchIngressParser(
        packet_in pkt,
        out headers_t hdr,
        out metadata_t ig_md,
        out ingress_intrinsic_metadata_t ig_intr_md) {

    TofinoIngressParser() tofino_parser;

    state start {
        tofino_parser.apply(pkt, ig_intr_md);
        transition parse_ethernet;
    }

    state parse_ethernet {
        pkt.extract(hdr.ethernet);
        transition accept;
    }

}

control SwitchIngressDeparser(
        packet_out pkt,
        inout headers_t hdr,
        in metadata_t ig_md,
        in ingress_intrinsic_metadata_for_deparser_t ig_dprsr_md) {

    apply {
        pkt.emit(hdr);
    }
}

control SwitchIngress(
        inout headers_t hdr,
        inout metadata_t ig_md,
        in ingress_intrinsic_metadata_t ig_intr_md,
        in ingress_intrinsic_metadata_from_parser_t ig_prsr_md,
        inout ingress_intrinsic_metadata_for_deparser_t ig_dprsr_md,
        inout ingress_intrinsic_metadata_for_tm_t ig_tm_md) {

    Register<bit<32>, PortId_t>(size=32w512, initial_value=0) cnt;
    RegisterAction<bit<32>, PortId_t, void>(cnt) set = {
        void apply(inout bit<32> value) {
            value = 32w2222;
        }
    };
    RegisterAction<bit<32>, PortId_t, bit<32>>(cnt) read = {
        void apply(inout bit<32> value, out bit<32> read_value) {
            read_value = value;
        }
    };

    Counter<bit<32>, bit<2>>(2, CounterType_t.PACKETS_AND_BYTES) counter;
    Counter<bit<32>, bit<2>>(2, CounterType_t.PACKETS_AND_BYTES) test;
    Counter<bit<32>, bit<2>>(2, CounterType_t.PACKETS_AND_BYTES) test_2;

    action do_write(PortId_t port) {
        test.count(0);
        set.execute(port);
    }
    action miss() {
        test.count(1);
    }

    table fwd_write {
        key = {
            hdr.ethernet.dst_addr : ternary;
        }

        actions = {
            do_write;
            miss;
        }

        size = 512;
    }

    action do_read(PortId_t port) {
        test_2.count(0);
        ig_md.qlen = read.execute(port);
    }
    action miss_2() {
        test_2.count(1);
    }

    table fwd_read {
        key = {
            hdr.ethernet.dst_addr : ternary;
        }

        actions = {
            do_read;
            miss_2;
            NoAction;
        }

        size = 512;
    }

    apply {
        fwd_write.apply();
        fwd_read.apply();
        if (ig_md.qlen > 0) {
            counter.count(0);
        } else {
            counter.count(1);
        }
    }
}

Pipeline(SwitchIngressParser(),
         SwitchIngress(),
         SwitchIngressDeparser(),
         EmptyEgressParser(),
         EmptyEgress(),
         EmptyEgressDeparser()) pipe;

Switch(pipe) main;

And I wrote a test script with Python.

import logging
import random
import time

from ptf import config
import ptf.testutils as testutils
import ptf.packet as packet
from p4testutils.misc_utils import *
from bfruntime_client_base_tests import BfRuntimeTest
import bfrt_grpc.client as gc
import bfrt_grpc.bfruntime_pb2 as bfruntime_pb2

##### Required for Thrift #####
import pd_base_tests

##### ******************* #####

pkt_len = int(test_param_get('pkt_size'))
logger = get_logger()
swports = get_sw_ports()
dev_id = 0
g_is_tofino = testutils.test_param_get('arch') == 'tofino'
g_is_tofino2 = testutils.test_param_get('arch') == 'tofino2'
assert g_is_tofino or g_is_tofino2
g_is_hw = testutils.test_param_get('target') == 'hw'
g_is_model = not g_is_hw

def get_pipes(bfrt_info):
    trgt = gc.Target(device_id=dev_id)
    t = bfrt_info.table_get('device_configuration')
    resp = t.default_entry_get(trgt)
    data,_ = next(resp)
    data_dict = data.to_dict()
    num_pipes = data_dict['num_pipes']
    return list(range(num_pipes))

def _forward_table_add(table, target, dmac, dmac_mask, priority, port, func):
    table.entry_add(
        target,
        [table.make_key(
            [gc.KeyTuple('hdr.ethernet.dst_addr', dmac, dmac_mask),
             gc.KeyTuple('$MATCH_PRIORITY', priority)])],
        [table.make_data(
            [gc.DataTuple('port', port)],
            func)])

class DirectCounterTest(BfRuntimeTest):
    """@brief Simple test of the direct counter
    """

    def setUp(self):
        client_id = 0
        p4_name = "my_test"
        BfRuntimeTest.setUp(self, client_id, p4_name)

    def runTest(self):
        ok_port = []
        for port in swports:
            if port != 0:
                ok_port.append(port)
        assert len(ok_port) >= 3
        print(f"swports: {swports}")
        ig_port = ok_port[0]
        eg_port = ok_port[1]
        logger.info("Ingress port: %d, Egress port: %d", ig_port, eg_port)
        smac = '11:33:55:77:99:00'
        dmac_mask = 'ff:ff:ff:ff:ff:ff'
        dmac = '00:11:22:33:44:55'

        # Get bfrt_info and set it as part of the test
        bfrt_info = self.interface.bfrt_info_get("my_test")

        wt_table = bfrt_info.table_get("SwitchIngress.fwd_write")
        wt_table.info.key_field_annotation_add("hdr.ethernet.dst_addr", "mac")
        rd_table = bfrt_info.table_get("SwitchIngress.fwd_read")
        rd_table.info.key_field_annotation_add("hdr.ethernet.dst_addr", "mac")

        target = gc.Target(device_id=dev_id)

        _forward_table_add(wt_table, target, dmac, dmac_mask, 0, eg_port, "SwitchIngress.do_write")
        _forward_table_add(rd_table, target, dmac, dmac_mask, 0, eg_port, "SwitchIngress.do_read")

        pkt = testutils.simple_tcp_packet(pktlen=pkt_len, eth_dst=dmac, eth_src=smac)
        testutils.send_packet(self, ig_port, pkt)

I properly added the table items. And I expect that after sending a packet, the counter(0) is 1. However, after testing, I found that counter(1) is 1, and all of the items in cnt are 0. The result really confuses me. Would someone take a look at why this is happening? Thanks!

Posting complete source code for P4 program and the test you are trying, and that it is for Tofino, are great details for getting an answer to your question.

Unfortunately for you, at the moment I am a bit busy and won’t try reproducing your test results myself. Instead I will ask you for a bit more homework on your side. If you have already done it before posting, great. If not, it is something I would recommend doing whenever you have questions like this.

  1. Do you have the test case isolated down to a single packet? Or at least down to a few packets, but only one of them has behavior you are confused about?
  2. For the packet whose processing behavior you are confused about, have you looked at the simulation log to see what the lookup key and action were for tables fwd_write and fwd_read, and what the value of ig_md.qlen was when the if (ig_md.qlen > 0) comparison was made? If so, what were those actions, and what was the value of ig_md.qlen? If the answers to those questions do not fully answer your question, I’d recommend following up with a reply giving that information, and any more detailed questions you have about it.

Dear @herano1999 ,

Thank you for providing the source code of your program and the test.

The first thing I see is that your test does not seem to contain any code to retrieve the counter and check that it is correct :slight_smile: . Indeed, the test ends after sending the packet, so no wonder it succeds.

This leads me to believe that you are retrieving the counters manually later. When doing this, please remember that counters are volatile, meaning that they can be changed by the HW without the SW being aware of that. Which is why it is critical to either read them directly from the HW or perform the synchronization of the counter table before retrieving those values.

Happy hacking,
Vladimir

Thanks for your reply @p4prof @andyfingerhut . Yes, this code is for the Tofino arch. The P4 code is split out from my original code, which is too long to show, and contains much irrelevant logic. I do sync from HW before I retrieve the counter. And I improved my Python test script to be clearer.

import logging
import random
import time

from ptf import config
import ptf.testutils as testutils
import ptf.packet as packet
from p4testutils.misc_utils import *
from bfruntime_client_base_tests import BfRuntimeTest
import bfrt_grpc.client as gc
import bfrt_grpc.bfruntime_pb2 as bfruntime_pb2

##### Required for Thrift #####
import pd_base_tests

##### ******************* #####

pkt_len = int(test_param_get('pkt_size'))
logger = get_logger()
swports = get_sw_ports()
dev_id = 0
g_is_tofino = testutils.test_param_get('arch') == 'tofino'
g_is_tofino2 = testutils.test_param_get('arch') == 'tofino2'
assert g_is_tofino or g_is_tofino2
g_is_hw = testutils.test_param_get('target') == 'hw'
g_is_model = not g_is_hw

def get_pipes(bfrt_info):
    trgt = gc.Target(device_id=dev_id)
    t = bfrt_info.table_get('device_configuration')
    resp = t.default_entry_get(trgt)
    data,_ = next(resp)
    data_dict = data.to_dict()
    num_pipes = data_dict['num_pipes']
    return list(range(num_pipes))

def _forward_table_add(table, target, dmac, dmac_mask, priority, port, func):
    table.entry_add(
        target,
        [table.make_key(
            [gc.KeyTuple('hdr.ethernet.dst_addr', dmac, dmac_mask),
             gc.KeyTuple('$MATCH_PRIORITY', priority)])],
        [table.make_data(
            [gc.DataTuple('port', port)],
            func)])

class DirectCounterTest(BfRuntimeTest):
    """@brief Simple test of the direct counter
    """

    def setUp(self):
        client_id = 0
        p4_name = "my_test"
        BfRuntimeTest.setUp(self, client_id, p4_name)

    def runTest(self):
        ok_port = []
        for port in swports:
            if port != 0:
                ok_port.append(port)
        assert len(ok_port) >= 3
        print(f"swports: {swports}")
        ig_port = ok_port[0]
        eg_port = ok_port[1]
        logger.info("Ingress port: %d, Egress port: %d", ig_port, eg_port)
        smac = '11:33:55:77:99:00'
        dmac_mask = 'ff:ff:ff:ff:ff:ff'
        dmac = '00:11:22:33:44:55'

        # Get bfrt_info and set it as part of the test
        bfrt_info = self.interface.bfrt_info_get("my_test")

        wt_table = bfrt_info.table_get("SwitchIngress.fwd_write")
        wt_table.info.key_field_annotation_add("hdr.ethernet.dst_addr", "mac")
        rd_table = bfrt_info.table_get("SwitchIngress.fwd_read")
        rd_table.info.key_field_annotation_add("hdr.ethernet.dst_addr", "mac")

        counter_table = bfrt_info.table_get("SwitchIngress.counter")
        cnt_table = bfrt_info.table_get("SwitchIngress.cnt")

        target = gc.Target(device_id=dev_id)

        _forward_table_add(wt_table, target, dmac, dmac_mask, 0, eg_port, "SwitchIngress.do_write")
        _forward_table_add(rd_table, target, dmac, dmac_mask, 0, eg_port, "SwitchIngress.do_read")

        pkt = testutils.simple_tcp_packet(pktlen=pkt_len, eth_dst=dmac, eth_src=smac)
        testutils.send_packet(self, ig_port, pkt)

        resp_cnt = cnt_table.entry_get(
            target,
            [cnt_table.make_key([gc.KeyTuple('$REGISTER_INDEX', eg_port)])],
            {"from_hw": True})
        data_cnt, _ = next(resp_cnt)
        data_dict_cnt = data_cnt.to_dict()
        print(data_dict_cnt)

        resp = counter_table.entry_get(
            target,
            [counter_table.make_key([gc.KeyTuple('$COUNTER_INDEX', 1)])],
            {"from_hw": True})
        data, _ = next(resp)
        data_dict = data.to_dict()
        print(data_dict)

        resp_2 = counter_table.entry_get(
            target,
            [counter_table.make_key([gc.KeyTuple('$COUNTER_INDEX', 1)])],
            {"from_hw": True})
        data_2, _ = next(resp_2)
        data_dict_2 = data_2.to_dict()
        print(data_dict_2)


The output is:

[... some irrelevant logs ...]
Received my_mirror_test on GetForwarding on client 0, device 0
Binding with p4_name my_mirror_test
Binding with p4_name my_mirror_test successful!!
swports: [16, 6, 11, 64, 1, 0, 7, 13, 5, 9, 10, 15, 3, 14, 12, 8, 4, 2]
Ingress port: 16, Egress port: 6
Received my_mirror_test on GetForwarding on client 0, device 0
{'SwitchIngress.cnt.f1': [0, 0, 0, 0], 'action_name': None, 'is_default_entry': False}
{'$COUNTER_SPEC_BYTES': 0, '$COUNTER_SPEC_PKTS': 0, 'action_name': None, 'is_default_entry': False}
{'$COUNTER_SPEC_BYTES': 104, '$COUNTER_SPEC_PKTS': 1, 'action_name': None, 'is_default_entry': False}
ok

I also find something quite weird in the log produced by the ./run_tofino_model script.

[... irrelevant SwitchIngressParser log ...]
:01-14 09:21:02.147463:    :0x1:-:<0,0,->:------------ Stage 0 ------------
:01-14 09:21:02.147606:    <0,0,0> WARN MauAddrDist: Bus meter_addr[15] REUSED!!!!  (0x018000c0(LT0) | 0x008000c0(LT1) -> 0x018000c0)
:01-14 09:21:02.147767:    :0x1:-:<0,0,0>:Ingress : Table SwitchIngress.fwd_read is hit
:01-14 09:21:02.147775:        :0x1:-:<0,0,0>:Key:
:01-14 09:21:02.147782:        :0x1:-:<0,0,0>:  hdr.ethernet.dst_addr[31:0] = 0x22334455
:01-14 09:21:02.147785:        :0x1:-:<0,0,0>:  hdr.ethernet.dst_addr[47:32] = 0x11
:01-14 09:21:02.147788:        :0x1:-:<0,0,0>:Executed StatefulALU 3 with instruction 1
:01-14 09:21:02.147793:        :0x1:-:<0,0,0>:Execute Action: SwitchIngress.do_read
:01-14 09:21:02.147802:        :0x1:-:<0,0,0>:Action Results:
:01-14 09:21:02.147805:        :0x1:-:<0,0,0>:  ----- CountPrimitive -----
:01-14 09:21:02.147807:        :0x1:-:<0,0,0>:  ----- Update counter: SwitchIngress.test_2
:01-14 09:21:02.147809:        :0x1:-:<0,0,0>:           VPN : 0 Ram Line : 0 Subword(Including LSBs. Need to right shift based on stats table format) : 0
:01-14 09:21:02.147812:        :0x1:-:<0,0,0>:  ----- ExecuteStatefulAluPrimitive -----
:01-14 09:21:02.147814:        :0x1:-:<0,0,0>:  ----- BlackBox: read_0 -----
:01-14 09:21:02.147817:        :0x1:-:<0,0,0>:  ----- register: SwitchIngress.cnt -----
:01-14 09:21:02.147849:        :0x1:-:<0,0,0>:  --- SALU Condition ---
:01-14 09:21:02.147976:        :0x1:-:<0,0,0>:    Not supplied by program.
:01-14 09:21:02.147981:        :0x1:-:<0,0,0>:      SALU ConditionLo: FALSE
:01-14 09:21:02.147984:        :0x1:-:<0,0,0>:      SALU ConditionHi: FALSE
:01-14 09:21:02.147986:        :0x1:-:<0,0,0>:  --- SALU Update ---
:01-14 09:21:02.147988:        :0x1:-:<0,0,0>:    None
:01-14 09:21:02.147991:        :0x1:-:<0,0,0>:  --- SALU Output ---
:01-14 09:21:02.147993:        :0x1:-:<0,0,0>:      Output predicate not supplied by program
:01-14 09:21:02.147995:        :0x1:-:<0,0,0>:      Output PredicateResult: TRUE
:01-14 09:21:02.147998:        :0x1:-:<0,0,0>:      Output Destination Field: ig_md.qlen = 0x0
:01-14 09:21:02.148000:        :0x1:-:<0,0,0>:  ---  SALU Register ---
:01-14 09:21:02.148002:        :0x1:-:<0,0,0>:     Register Index: 0x40006
:01-14 09:21:02.148004:        :0x1:-:<0,0,0>:       Before stateful alu execution: 0x00000000
:01-14 09:21:02.148006:        :0x1:-:<0,0,0>:       After stateful alu execution: 0x00000000
:01-14 09:21:02.148010:        :0x1:-:<0,0,0>:Next Table = SwitchIngress.fwd_write
:01-14 09:21:02.148017:    :0x1:-:<0,0,0>:Ingress : Table SwitchIngress.fwd_write is hit
:01-14 09:21:02.148019:        :0x1:-:<0,0,0>:Key:
:01-14 09:21:02.148024:        :0x1:-:<0,0,0>:  hdr.ethernet.dst_addr[31:0] = 0x22334455
:01-14 09:21:02.148026:        :0x1:-:<0,0,0>:  hdr.ethernet.dst_addr[47:32] = 0x11
:01-14 09:21:02.148029:        :0x1:-:<0,0,0>:Executed StatefulALU 3 with instruction 0
:01-14 09:21:02.148033:        :0x1:-:<0,0,0>:Execute Action: SwitchIngress.do_write
:01-14 09:21:02.148039:        :0x1:-:<0,0,0>:Action Results:
:01-14 09:21:02.148041:        :0x1:-:<0,0,0>:  ----- CountPrimitive -----
:01-14 09:21:02.148043:        :0x1:-:<0,0,0>:  ----- Update counter: SwitchIngress.test
:01-14 09:21:02.148046:        :0x1:-:<0,0,0>:           VPN : 0 Ram Line : 0 Subword(Including LSBs. Need to right shift based on stats table format) : 0
:01-14 09:21:02.148048:        :0x1:-:<0,0,0>:  ----- ExecuteStatefulAluPrimitive -----
:01-14 09:21:02.148055:        :0x1:-:<0,0,0>:  ----- BlackBox: set_0 -----
:01-14 09:21:02.148057:        :0x1:-:<0,0,0>:  ----- register: SwitchIngress.cnt -----
:01-14 09:21:02.148060:        :0x1:-:<0,0,0>:Next Table = tbl_my_mirror_test123
:01-14 09:21:02.148065:    :0x1:-:<0,0,->:------------ Stage 1 ------------
:01-14 09:21:02.148145:    :0x1:-:<0,0,1>:Ingress : Gateway table condition ((ig_md.qlen > 0)) matched.
:01-14 09:21:02.148318:    :0x1:-:<0,0,1>:Ingress : Gateway attached to tbl_my_mirror_test123
:01-14 09:21:02.148324:    :0x1:-:<0,0,1>:Ingress : Associated table tbl_my_mirror_test123 is executed
:01-14 09:21:02.148327:    :0x1:-:<0,0,1>:Ingress : Gateway did provide payload.
:01-14 09:21:02.148330:        :0x1:-:<0,0,1>:Next Table = tbl_my_mirror_test125
:01-14 09:21:02.148334:    :0x1:-:<0,0,1>:Ingress : Table tbl_my_mirror_test125 is miss
:01-14 09:21:02.148336:        :0x1:-:<0,0,1>:Key:
:01-14 09:21:02.148346:        :0x1:-:<0,0,1>:Execute Default Action: my_mirror_test125
:01-14 09:21:02.148350:        :0x1:-:<0,0,1>:Action Results:
:01-14 09:21:02.148352:        :0x1:-:<0,0,1>:  ----- CountPrimitive -----
:01-14 09:21:02.148355:        :0x1:-:<0,0,1>:Next Table = --END_OF_PIPELINE--
:01-14 09:21:02.148359:    :0x1:-:<0,0,->:------------ Stage 2 ------------
:01-14 09:21:02.148432:    :0x1:-:<0,0,->:------------ Stage 3 ------------
:01-14 09:21:02.148502:    :0x1:-:<0,0,->:------------ Stage 4 ------------
:01-14 09:21:02.148781:    :0x1:-:<0,0,->:------------ Stage 5 ------------
:01-14 09:21:02.148893:    :0x1:-:<0,0,->:------------ Stage 6 ------------
:01-14 09:21:02.148965:    :0x1:-:<0,0,->:------------ Stage 7 ------------
:01-14 09:21:02.149096:    :0x1:-:<0,0,->:------------ Stage 8 ------------
:01-14 09:21:02.149243:    :0x1:-:<0,0,->:------------ Stage 9 ------------
:01-14 09:21:02.149429:    :0x1:-:<0,0,->:------------ Stage 10 ------------
:01-14 09:21:02.149576:    :0x1:-:<0,0,->:------------ Stage 11 ------------
:01-14 09:21:02.149661:    :0x1:-:<0,0,->:------------ Stage 0 EOP processing ------------
:01-14 09:21:02.149913:    :0x1:-:<0,0,->:------------ Stage 1 EOP processing ------------
:01-14 09:21:02.149949:    :0x1:-:<0,0,->:------------ Stage 2 EOP processing ------------
:01-14 09:21:02.149968:    :0x1:-:<0,0,->:------------ Stage 3 EOP processing ------------
:01-14 09:21:02.149983:    :0x1:-:<0,0,->:------------ Stage 4 EOP processing ------------
:01-14 09:21:02.149997:    :0x1:-:<0,0,->:------------ Stage 5 EOP processing ------------
:01-14 09:21:02.150014:    :0x1:-:<0,0,->:------------ Stage 6 EOP processing ------------
:01-14 09:21:02.150108:    :0x1:-:<0,0,->:------------ Stage 7 EOP processing ------------
:01-14 09:21:02.150129:    :0x1:-:<0,0,->:------------ Stage 8 EOP processing ------------
:01-14 09:21:02.150143:    :0x1:-:<0,0,->:------------ Stage 9 EOP processing ------------
:01-14 09:21:02.150192:    :0x1:-:<0,0,->:------------ Stage 10 EOP processing ------------
:01-14 09:21:02.150208:    :0x1:-:<0,0,->:------------ Stage 11 EOP processing ------------
[... irrelevant SwitchIngressDeparser log ...]

In the log, it seems that the switch first hit fwd_read, then fwd_write. However, in the P4 code, I apply fwd_write first, then fwd_read. Even if executed in the order reflected in the log, the set action is still ineffective and does not update the cnt register. I cannot recognize what is happening on HW. Is there any tools like GDB, or some outputing method which can directly debug the P4 code on HW?

Thanks!

Dear @herano1999 ,

Thank you for the updated test. Unfortunately, even though you added the code to read the register cnt and the counter counter (BTW, all the counters you use are indirect and not direct as one might conclude from the name of the test) you did not add any calls to unittest.assertXXX() functions, so I can only guess what your expectations might be.

Anyhow, I can see several problems here, so let’s analyze what’s going on.

  1. The most serious problem is that the action do_write() failed to write the constant 32w2222 into the register cnt (which is why it is 0 in all pipes). You might not like the explanation, but there are two problems here:
    a. You are asking Tofino to do something it can’t, specifically to access the same Register() extern more than once for the same packet (first time in the action do_write() and second time – in the action do_read()).
    b. The compiler should’ve flagged the problem and refused to compile your program, but indeed it produced incorrect code for it, forcing you to scratch your head. This is a compiler bug.
  2. You read the counter counter (entry 1) twice. It gets incremented (as expected), but you can see it only on the second read. This it a bug in your test – you should’ve waited a little longer. Usually this is done by ascertaining that the correct egress packet was sent first (or that no packets were sent) using PTF’s verify_xxx()functions, and checking the counters and registers afterwards. You can always insert a delay as a quick workaround and you’ll see the updated value of the counter on the first read.

Here are some findings/useful facts:

  1. Even though your test doesn’t check them, I did check the counters test and test_2 and they both got incremented as expected.
  2. The compiler bug that we discussed above produced another strange effect: the compiler decided that tables fwd_read and fwd_write do not have dependencies and put them into the same stage (0). Once such a decision is made, the order of these tables does not matter, which explains the “strange” order you saw in the logs.
  3. I do not know the details of your algorithm, but it is certainly possible to use a single RegisterAction() to both output the current value from the register and write a new one at the same time. In fact, the whole point of RegisterAction() extern is to overcome (relax) the “no more than one access per packet rule” – whatever you put into a single register action counts as one access. In general, a register action can read a value from the register, perform some operations, write a new value back (or leave the old unchanged) and optionally output something too.

Happy hacking,
Vladimir

Thank you for your thoughtful explanation! I tried to do some refactoring on the code to get rid of the twice-touched register, and now it works as expected! I will raise an issue at the p4c repo later.

Cheers!