From 9e330b5c4fb2384977d5cad98c314c1366f49470 Mon Sep 17 00:00:00 2001 From: Stamatis Zampetakis Date: Thu, 14 Sep 2023 15:53:03 +0200 Subject: [PATCH] TEZ-4503: Warn about large conf properties in payload --- .../java/org/apache/tez/common/TezUtils.java | 23 +++++++++++++++++++ .../apache/tez/dag/api/TezConfiguration.java | 20 ++++++++++++++++ 2 files changed, 43 insertions(+) diff --git a/tez-api/src/main/java/org/apache/tez/common/TezUtils.java b/tez-api/src/main/java/org/apache/tez/common/TezUtils.java index 88920a4e53..cb6ff2bae4 100644 --- a/tez-api/src/main/java/org/apache/tez/common/TezUtils.java +++ b/tez-api/src/main/java/org/apache/tez/common/TezUtils.java @@ -29,6 +29,7 @@ import com.google.protobuf.ByteString; import com.google.protobuf.CodedInputStream; +import org.apache.tez.dag.api.TezConfiguration; import org.apache.tez.runtime.api.TaskContext; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -43,6 +44,11 @@ import org.xerial.snappy.SnappyInputStream; import org.xerial.snappy.SnappyOutputStream; +import static org.apache.tez.dag.api.TezConfiguration.TEZ_LOGGING_PROPERTY_MASK; +import static org.apache.tez.dag.api.TezConfiguration.TEZ_LOGGING_PROPERTY_MASK_DEFAULT; +import static org.apache.tez.dag.api.TezConfiguration.TEZ_LOGGING_PROPERTY_SIZE_THRESHOLD; +import static org.apache.tez.dag.api.TezConfiguration.TEZ_LOGGING_PROPERTY_SIZE_THRESHOLD_DEFAULT; + /** * Utility methods for setting up a DAG. Has helpers for setting up log4j configuration, converting * {@link org.apache.hadoop.conf.Configuration} to {@link org.apache.tez.dag.api.UserPayload} etc. @@ -51,6 +57,14 @@ public final class TezUtils { private static final Logger LOG = LoggerFactory.getLogger(TezUtils.class); + private static final int PROPERTY_THRESHOLD; + private static final boolean PROPERTY_MASK; + + static { + TezConfiguration c = new TezConfiguration(); + PROPERTY_THRESHOLD = c.getInt(TEZ_LOGGING_PROPERTY_SIZE_THRESHOLD, TEZ_LOGGING_PROPERTY_SIZE_THRESHOLD_DEFAULT); + PROPERTY_MASK = c.getBoolean(TEZ_LOGGING_PROPERTY_MASK, TEZ_LOGGING_PROPERTY_MASK_DEFAULT); + } private TezUtils() {} @@ -211,10 +225,19 @@ public static void populateConfProtoFromEntries(Iterable PROPERTY_THRESHOLD) { + LOG.warn("Property '{}' is unusually big ({} bytes); large payload may lead to OOM.", key, value.length()); + if (!PROPERTY_MASK) { + LOG.warn("Large property '{}': {}", key, value); + } + } + } } diff --git a/tez-api/src/main/java/org/apache/tez/dag/api/TezConfiguration.java b/tez-api/src/main/java/org/apache/tez/dag/api/TezConfiguration.java index 9e2e2d89cf..32b1a4ac9b 100644 --- a/tez-api/src/main/java/org/apache/tez/dag/api/TezConfiguration.java +++ b/tez-api/src/main/java/org/apache/tez/dag/api/TezConfiguration.java @@ -1580,6 +1580,26 @@ public TezConfiguration(boolean loadDefaults) { TEZ_PREFIX + "generate.debug.artifacts"; public static final boolean TEZ_GENERATE_DEBUG_ARTIFACTS_DEFAULT = false; + /** + * Int value. Property size threshold (in bytes) for logging during payload serialization. Properties exceeding the + * threshold are considered unusually large and potentially problematic thus they should be logged. + */ + @ConfigurationScope(Scope.VERTEX) + @ConfigurationProperty(type="integer") + public static final String TEZ_LOGGING_PROPERTY_SIZE_THRESHOLD = + TEZ_PREFIX + "logging.property.size.threshold"; + public static final int TEZ_LOGGING_PROPERTY_SIZE_THRESHOLD_DEFAULT = 512 * 1024; + /** + * Boolean value. Whether property masking is enabled for logging. Properties may contain sensitive user information + * such as passwords, credentials, secrets, etc., so they shouldn't be logged unconditionally. When masking is + * enabled, the property value (content) is not displayed in the logs. + */ + @ConfigurationScope(Scope.VERTEX) + @ConfigurationProperty + public static final String TEZ_LOGGING_PROPERTY_MASK = + TEZ_PREFIX + "logging.property.mask"; + public static final boolean TEZ_LOGGING_PROPERTY_MASK_DEFAULT = true; + /** * Set of tasks for which specific launch command options need to be added. * Format: "vertexName[csv of task ids];vertexName[csv of task ids].."