Skip to content

Commit

Permalink
improve estimate for number of packets, improve benchmark
Browse files Browse the repository at this point in the history
  • Loading branch information
Gregory Russell committed Nov 30, 2021
1 parent 1a3523d commit 4c42992
Show file tree
Hide file tree
Showing 8 changed files with 75 additions and 12 deletions.
32 changes: 24 additions & 8 deletions parser/pcap.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ var (
sparseLogger = log.New(os.Stdout, "sparse: ", log.LstdFlags|log.Lshortfile)
sparse20 = logx.NewLogEvery(sparseLogger, 50*time.Millisecond)

ErrNoIPLayer = fmt.Errorf("no IP layer")
ErrNoIPLayer = fmt.Errorf("no IP layer")
ErrTruncatedPcap = fmt.Errorf("truncated pcap file")
)

// Packet struct contains the packet data and metadata.
Expand Down Expand Up @@ -69,14 +70,29 @@ func GetPackets(data []byte) ([]Packet, error) {
return nil, err
}

// TODO: len(data)/18 provides much better estimate of number of packets.
// len(data)/18 was determined by looking at bytes/packet in a few pcaps files.
// The number seems too small, but perhaps the data is still compressed at this point.
// However, it seems to cause mysterious crashes in sandbox, so
// reverting to /1500 for now.
packets := make([]Packet, 0, len(data)/1500)
// Estimate the number of packets in the file.
pktSize := int(pcap.Snaplen())
if pktSize < 1 {
pktSize = 1
}
pcapSize := len(data) // Only if the data is not compressed.
// Check magic number?
if len(data) < 4 {
return nil, ErrTruncatedPcap
}
if data[0] != 0xd4 && data[1] != 0xc3 && data[2] != 0xb2 && data[3] != 0xa1 {
// For compressed data, the 8x factor is based on testing with a few large gzipped files.
pcapSize *= 8
}

// This computed slice sizing alone changes the throughput in sandbox from about 640
// to about 820 MB/sec per instance.
// NOTE that previously, we got about 1.07 GB/sec for just indexing.
packets := make([]Packet, 0, pcapSize/pktSize)

for data, ci, err := pcap.ZeroCopyReadPacketData(); err == nil; data, ci, err = pcap.ReadPacketData() {
// NOTE: The ReadPacketData call is doing about 99% of the allocs, and allocating about 30% of the bytes.
// Using ZeroCopy eliminates most of this, but then the packets in the slice have corrupted content.
for data, ci, err := pcap.ReadPacketData(); err == nil; data, ci, err = pcap.ReadPacketData() {
packets = append(packets, Packet{Ci: ci, Data: data, Err: err})
}

Expand Down
55 changes: 51 additions & 4 deletions parser/pcap_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ func TestPCAPGarbage(t *testing.T) {
}
}

func getTestFile(b *testing.B, name string) []byte {
func getTestfileForBenchmark(b *testing.B, name string) []byte {
f, err := os.Open(path.Join(`testdata/PCAP/`, name))
if err != nil {
b.Fatal(err)
Expand All @@ -215,15 +215,16 @@ func getTestFile(b *testing.B, name string) []byte {
// With IP decoding: BenchmarkGetPackets-8 4279 285547 ns/op 376125 B/op 1729 allocs/op

// Enhanced RunParallel: BenchmarkGetPackets-8 2311 514898 ns/op 1181138 B/op 1886 allocs/op
// Estimate num packets: BenchmarkGetPackets-8 3688 329539 ns/op 571419 B/op 1888 allocs/op
func BenchmarkGetPackets(b *testing.B) {
type tt struct {
data []byte
numPkts int
}
tests := []tt{
{getTestFile(b, "ndt-nnwk2_1611335823_00000000000C2DFE.pcap.gz"), 336},
{getTestFile(b, "ndt-nnwk2_1611335823_00000000000C2DA8.pcap.gz"), 15},
{getTestFile(b, "ndt-nnwk2_1611335823_00000000000C2DA9.pcap.gz"), 5180},
{getTestfileForBenchmark(b, "ndt-nnwk2_1611335823_00000000000C2DFE.pcap.gz"), 336},
{getTestfileForBenchmark(b, "ndt-nnwk2_1611335823_00000000000C2DA8.pcap.gz"), 15},
{getTestfileForBenchmark(b, "ndt-nnwk2_1611335823_00000000000C2DA9.pcap.gz"), 5180},
}
b.ResetTimer()

Expand All @@ -242,3 +243,49 @@ func BenchmarkGetPackets(b *testing.B) {
}
})
}

// BenchmarkGetPackets2-8 235 5228191 ns/op 337.68 MB/s 37436 packets/op 12051418 B/op 37652 allocs/op
// Approximately 300 bytes/packet on average.
func BenchmarkGetPackets2(b *testing.B) {
type tt struct {
data []byte
numPkts int
}
tests := []tt{
// Approximately 220K packets, so this is about 140nsec/packet, and about 100 bytes/packet allocated,
// which is roughly the footprint of the packets themselves.
{getTestfileForBenchmark(b, "ndt-nnwk2_1611335823_00000000000C2DFE.pcap.gz"), 336},
{getTestfileForBenchmark(b, "ndt-nnwk2_1611335823_00000000000C2DA8.pcap.gz"), 15},
{getTestfileForBenchmark(b, "ndt-nnwk2_1611335823_00000000000C2DA9.pcap.gz"), 5180},
{getTestfileForBenchmark(b, "ndt-m6znc_1632401351_000000000005BA77.pcap.gz"), 40797},
{getTestfileForBenchmark(b, "ndt-m6znc_1632401351_000000000005B9EA.pcap.gz"), 146172},
{getTestfileForBenchmark(b, "ndt-m6znc_1632401351_000000000005B90B.pcap.gz"), 30097},
}
b.ReportAllocs()
b.ResetTimer()

b.ReportMetric(220000, "packets/op")

i := 0

numPkts := 0
ops := 0
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
test := tests[i%len(tests)]
ops++
numPkts += test.numPkts
i++
pkts, err := parser.GetPackets(test.data)
if err != nil {
b.Fatal(err)
}
if len(pkts) != test.numPkts {
b.Errorf("expected %d packets, got %d", test.numPkts, len(pkts))
}
b.SetBytes(int64(len(test.data)))
}
})
b.Log("total packets", numPkts, "total ops", ops)
b.ReportMetric(float64(numPkts/ops), "packets/op")
}
Binary file added parser/testdata/PCAP/.DS_Store
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.

0 comments on commit 4c42992

Please sign in to comment.