From cf6b6b55dc34508b6d5e7b6801c381acb0152042 Mon Sep 17 00:00:00 2001 From: Tanat Boozayaangool Date: Mon, 9 Jan 2023 12:00:56 +0000 Subject: [PATCH] Add info in GPU Counter failures, misc fixes to validation (#1261) * Add fix for generic validator * Added more detailed error messages for counter checks --- .../gapid/views/DeviceValidationView.java | 7 +- gapis/trace/android/generic/validate.go | 2 +- gapis/trace/android/trace.go | 1 + gapis/trace/android/validate/validate.go | 89 ++++++++++++++----- 4 files changed, 72 insertions(+), 27 deletions(-) diff --git a/gapic/src/main/com/google/gapid/views/DeviceValidationView.java b/gapic/src/main/com/google/gapid/views/DeviceValidationView.java index aff0bc5a5..3a947d7f6 100644 --- a/gapic/src/main/com/google/gapid/views/DeviceValidationView.java +++ b/gapic/src/main/com/google/gapid/views/DeviceValidationView.java @@ -33,7 +33,6 @@ import com.google.gapid.models.Devices.DeviceValidationResult.ValidatorType; import com.google.gapid.models.Models; import com.google.gapid.proto.device.Device; -import com.google.gapid.proto.service.Service; import com.google.gapid.rpc.SingleInFlight; import com.google.gapid.util.Logging; import com.google.gapid.util.OS; @@ -219,9 +218,9 @@ private void displayResult(DeviceValidationResult result) { } // Separate error text from help text as a workaround to enable proper text wrapping. - errorMessageGroup = - withLayoutData( - createGroup(this, ""), withSpans(new GridData(SWT.FILL, SWT.TOP, true, false), 3, 1)); + GridData gridData = new GridData(SWT.FILL, SWT.TOP, true, false); + gridData.heightHint = 60; + errorMessageGroup = withLayoutData(createGroup(this, ""), withSpans(gridData, 3, 1)); Text errText = withLayoutData( createTextbox(errorMessageGroup, SWT.READ_ONLY | SWT.WRAP, result.toString()), diff --git a/gapis/trace/android/generic/validate.go b/gapis/trace/android/generic/validate.go index 1fa27050a..332daf38b 100644 --- a/gapis/trace/android/generic/validate.go +++ b/gapis/trace/android/generic/validate.go @@ -39,7 +39,7 @@ func NewGenericValidator(device bind.Device) *GenericValidator { } func counterChecker() validate.Checker { - return validate.And(validate.IsNumber, validate.CheckLargerThanZero()) + return validate.And(validate.IsNumber, validate.Not(validate.CheckAllEqualTo(0))) } func (v *GenericValidator) Validate(ctx context.Context, processor *perfetto.Processor) error { diff --git a/gapis/trace/android/trace.go b/gapis/trace/android/trace.go index f605cdacc..00b5db129 100644 --- a/gapis/trace/android/trace.go +++ b/gapis/trace/android/trace.go @@ -295,6 +295,7 @@ func (t *androidTracer) Validate(ctx context.Context, enableLocalFiles bool) (*s return } log.I(ctx, "Writing trace size %v bytes to %v", numWritten, file.Name()) + res.TracePath = file.Name() }) if traceLoadingErr != nil { return nil, traceLoadingErr diff --git a/gapis/trace/android/validate/validate.go b/gapis/trace/android/validate/validate.go index 3c2012fe6..5cf3dcd65 100644 --- a/gapis/trace/android/validate/validate.go +++ b/gapis/trace/android/validate/validate.go @@ -18,6 +18,8 @@ import ( "context" "fmt" "math" + "sort" + "strings" "github.com/google/gapid/core/log" "github.com/google/gapid/gapis/perfetto" @@ -26,7 +28,7 @@ import ( ) const ( - counterIDQuery = "select id from gpu_counter_track where name = '%v'" + counterIdQuery = "select id from gpu_counter_track where name = '%v'" counterValuesQuery = "" + "select value from counter " + "where track_id = %v order by ts " + @@ -159,40 +161,50 @@ func CheckAverageApproximateTo(num, margin float64) Checker { } // ValidateGpuCounters queries for the GPU counter from the trace and -// validates the value based on associated the requirement, +// validates the value based on the associated check, // up to the amount specified in passThreshold. func ValidateGpuCounters(ctx context.Context, processor *perfetto.Processor, counters []GpuCounter, passThreshold int) error { passCount := 0 var failedCounters []GpuCounter + var missingCounters []GpuCounter + counters = trimDuplicates(counters) + if passThreshold > len(counters) { + log.E(ctx, "Received passThreshold (%v) higher than amount of counters (%v)", passThreshold, len(counters)) + passThreshold = len(counters) + } + for _, counter := range counters { - queryResult, err := processor.Query(fmt.Sprintf(counterIDQuery, counter.Name)) + // Converts counter name to track table ID. + counterIdResult, err := processor.Query(fmt.Sprintf(counterIdQuery, counter.Name)) if err != nil { - return log.Errf(ctx, err, "Failed to query with %v", fmt.Sprintf(counterIDQuery, counter.Name)) - } - if len(queryResult.GetColumns()) != 1 { - return log.Errf(ctx, err, "Expected one result with query: %v", fmt.Sprintf(counterIDQuery, counter.Name)) + return log.Errf(ctx, err, "Failed to query with %v", fmt.Sprintf(counterIdQuery, counter.Name)) } - var counterID int64 - for _, column := range queryResult.GetColumns() { - longValues := column.GetLongValues() - if len(longValues) != 1 { - // This tends to happen when the device fails to report GPU counter values. - return log.Errf(ctx, nil, "Expected one result for %v but got %v", counter, len(longValues)) - } - counterID = longValues[0] - break + + // Ensure that there's one track table ID for the current counter. + counterIdValues := counterIdResult.GetColumns()[0].GetLongValues() + if len(counterIdValues) == 0 { + log.E(ctx, "Trace does not contain expected counter %v", counter) + missingCounters = append(missingCounters, counter) + continue + } else if len(counterIdValues) != 1 { + // We should never run into situation where there are more than 1 results. + return log.Errf(ctx, nil, "Found unexpected %v results for counter %v", len(counterIdValues), counter) } - queryResult, err = processor.Query(fmt.Sprintf(counterValuesQuery, counterID, sampleCounter)) + trackTableId := counterIdValues[0] + + valueQueryResult, err := processor.Query(fmt.Sprintf(counterValuesQuery, trackTableId, sampleCounter)) if err != nil { - return log.Errf(ctx, err, "Failed to query with %v for counter %v", fmt.Sprintf(counterValuesQuery, counterID), counter) + return log.Errf(ctx, err, "Failed to query with %v for counter %v", fmt.Sprintf(counterValuesQuery, trackTableId), counter) } // Query exactly #sampleCounter samples, fail if not enough samples - if queryResult.GetNumRecords() != sampleCounter { - return log.Errf(ctx, nil, "Number of samples is incorrect for counter: %v %v", counter, queryResult.GetNumRecords()) + if valueQueryResult.GetNumRecords() != sampleCounter { + log.E(ctx, "Expected %v number of samples for counter %v but got: %v", sampleCounter, counter, valueQueryResult.GetNumRecords()) + failedCounters = append(failedCounters, counter) + continue } - if counter.Check(queryResult.GetColumns()[0], queryResult.GetColumnDescriptors()[0].GetType()) { + if counter.Check(valueQueryResult.GetColumns()[0], valueQueryResult.GetColumnDescriptors()[0].GetType()) { passCount++ if passCount >= passThreshold { return nil @@ -201,7 +213,40 @@ func ValidateGpuCounters(ctx context.Context, processor *perfetto.Processor, cou failedCounters = append(failedCounters, counter) } } - return log.Errf(ctx, nil, "Check failed for counters: %v", failedCounters) + + var sb strings.Builder + sb.WriteString(fmt.Sprintf("Passed %v checks out of %v, expected to pass %v check(s). ", passCount, len(counters), passThreshold)) + + if len(missingCounters) > 0 { + sb.WriteString(fmt.Sprintf("Missing %v counter(s): %v\n", len(missingCounters), missingCounters)) + } + if len(failedCounters) > 0 { + sb.WriteString(fmt.Sprintf("Failed check for %v counter(s): %v", len(failedCounters), failedCounters)) + } + + return log.Errf(ctx, nil, sb.String()) +} + +// trimDuplicates removes duplicates from the GPU counter array. +func trimDuplicates(counters []GpuCounter) []GpuCounter { + duplicateMap := make(map[int]GpuCounter) + res := make([]GpuCounter, 0) + for _, counter := range counters { + duplicateMap[int(counter.Id)] = counter + } + + // Sort the ids to retrieve from the map. + ids := make([]int, 0, len(res)) + for id := range duplicateMap { + ids = append(ids, id) + } + sort.Ints(ids) + + for _, id := range ids { + res = append(res, duplicateMap[id]) + } + + return res } // GetTrackIDs returns all track ids from gpu_track with the given scope.