Skip to content

Commit

Permalink
HPCC-30917 Avoid core when eclagent terminates due to span destruction
Browse files Browse the repository at this point in the history
Signed-off-by: Gavin Halliday <gavin.halliday@lexisnexis.com>
  • Loading branch information
ghalliday committed Nov 27, 2023
1 parent f9eb573 commit d3c4362
Show file tree
Hide file tree
Showing 15 changed files with 103 additions and 19 deletions.
4 changes: 4 additions & 0 deletions common/thorhelper/thorcommon.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -717,6 +717,10 @@ class CStatsContextLogger : public CSimpleInterfaceOf<IContextLogger>
{
return traceLevel;
}
virtual ISpan * queryActiveSpan() const override
{
return activeSpan;
}
virtual void setActiveSpan(ISpan * span) override
{
activeSpan.set(span);
Expand Down
2 changes: 1 addition & 1 deletion ecl/eclagent/eclagent.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2132,7 +2132,7 @@ void EclAgent::runProcess(IEclProcess *process)

Owned<IProperties> traceHeaders = extractTraceDebugOptions(queryWorkUnit());
Owned<ISpan> requestSpan = queryTraceManager().createServerSpan(queryWorkUnit()->queryWuid(), traceHeaders);
updateDummyContextLogger().setActiveSpan(requestSpan);
ContextSpanScope spanScope(updateDummyContextLogger(), requestSpan);

// a component may specify an alternate name for the agent/workflow memory area,
// e.g. Thor specifies in "eclAgentMemory"
Expand Down
24 changes: 15 additions & 9 deletions esp/bindings/http/platform/httpservice.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -377,18 +377,24 @@ int CEspHttpServer::processRequest()

checkSetCORSAllowOrigin(thebinding, m_request, m_response);

if (thebinding!=NULL)
if (thebinding && thebinding->isUnrestrictedSSType(stype))
{
if (thebinding->isUnrestrictedSSType(stype))
{
thebinding->onGetUnrestricted(m_request.get(), m_response.get(), serviceName.str(), methodName.str(), stype);
ctx->addTraceSummaryTimeStamp(LogMin, "handleHttp");
return 0;
}
//Avoid creating a span for unrestrictedSSType requests
thebinding->onGetUnrestricted(m_request.get(), m_response.get(), serviceName.str(), methodName.str(), stype);
ctx->addTraceSummaryTimeStamp(LogMin, "handleHttp");
return 0;
}

//Avoids unrestrictedSSType requests
m_request->startSpan();
//The context will be destroyed when this request is destroyed. So rather than using
//EspContextSpanScope spanContext(*ctx, serverSpan);
//which would remove the activeSpan when this function exits, use
//setActiveSpan()
//It is possible that using EspContextSpanScope would be better
Owned<ISpan> serverSpan = m_request->createServerSpan();
ctx->setActiveSpan(serverSpan);

if (thebinding!=NULL)
{
if(stricmp(method.str(), POST_METHOD)==0)
thebinding->handleHttpPost(m_request.get(), m_response.get());
else if(!stricmp(method.str(), GET_METHOD))
Expand Down
5 changes: 2 additions & 3 deletions esp/bindings/http/platform/httptransport.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1915,12 +1915,11 @@ int CHttpRequest::receive(IMultiException *me)
return 0;
}

void CHttpRequest::startSpan()
ISpan * CHttpRequest::createServerSpan()
{
//MORE: The previous code would be better off querying httpHeaders...
Owned<IProperties> httpHeaders = getHeadersAsProperties(m_headers);
Owned<ISpan> requestSpan = queryTraceManager().createServerSpan("HTTPRequest", httpHeaders, SpanFlags::EnsureGlobalId);
m_context->setActiveSpan(requestSpan);
return queryTraceManager().createServerSpan("HTTPRequest", httpHeaders, SpanFlags::EnsureGlobalId);
}

void CHttpRequest::annotateSpan(const char * key, const char * value)
Expand Down
2 changes: 1 addition & 1 deletion esp/bindings/http/platform/httptransport.ipp
Original file line number Diff line number Diff line change
Expand Up @@ -369,7 +369,7 @@ public:

virtual int receive(IMultiException *me);

void startSpan();
ISpan * createServerSpan();
void updateContext();
void annotateSpan(const char * key, const char * value);

Expand Down
4 changes: 3 additions & 1 deletion esp/platform/esp.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -235,13 +235,15 @@ interface IEspContext : extends IInterface
virtual void setRequest(IHttpMessage* req) = 0;
virtual IHttpMessage* queryRequest() = 0;

virtual void setActiveSpan(ISpan * span)=0;
virtual void setActiveSpan(ISpan * span)=0; // Only call this function directly if this object's lifetime matches the lifetime of the span. If there is any doubt use EspContextSpanScope(ctx, span)
virtual ISpan * queryActiveSpan() const = 0;
virtual IProperties * getClientSpanHeaders() const = 0;
virtual const char* getGlobalId() const = 0;
virtual const char* getCallerId() const = 0;
virtual const char* getLocalId() const = 0;
};
//Class for managing the active span in the context
using EspContextSpanScope = ContextSpanScopeImp<IEspContext>;


typedef unsigned LogLevel;
Expand Down
4 changes: 4 additions & 0 deletions roxie/ccd/ccd.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -742,6 +742,10 @@ class ContextLogger : implements IRoxieContextLogger, public CInterface
{
stats.reset();
}
virtual ISpan * queryActiveSpan() const override
{
return activeSpan;
}
virtual void setActiveSpan(ISpan * span) override
{
activeSpan.set(span);
Expand Down
4 changes: 4 additions & 0 deletions roxie/ccd/ccdcontext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1382,6 +1382,10 @@ class CRoxieContextBase : implements IRoxieAgentContext, implements ICodeContext
{
return logctx.queryTraceLevel();
}
virtual ISpan * queryActiveSpan() const override
{
return logctx.queryActiveSpan();
}
virtual void setActiveSpan(ISpan * span) override
{
const_cast<IRoxieContextLogger&>(logctx).setActiveSpan(span);
Expand Down
3 changes: 2 additions & 1 deletion roxie/ccd/ccdlistener.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1180,7 +1180,7 @@ class RoxieWorkUnitWorker : public RoxieQueryWorker

Owned<IProperties> traceHeaders = extractTraceDebugOptions(wu);
Owned<ISpan> requestSpan = queryTraceManager().createServerSpan(wu->queryWuid(), traceHeaders);
logctx->setActiveSpan(requestSpan);
ContextSpanScope spanScope(*logctx, requestSpan);

Owned<IQueryFactory> queryFactory;
try
Expand Down Expand Up @@ -1456,6 +1456,7 @@ class RoxieProtocolMsgContext : implements IHpccProtocolMsgContext, public CInte
ensureContextLogger();

Owned<ISpan> requestSpan = queryTraceManager().createServerSpan("request", allHeaders, flags);
//The span has a lifetime the same length as the logctx, so no need to restore it at the end of the query
logctx->setActiveSpan(requestSpan);

const char * globalId = requestSpan->queryGlobalId();
Expand Down
8 changes: 8 additions & 0 deletions roxie/ccd/ccdserver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,10 @@ class IndirectAgentContext : implements IRoxieAgentContext, public CInterface
{
return ctx->isBlind();
}
virtual ISpan * queryActiveSpan() const override
{
return ctx->queryActiveSpan();
}
virtual void setActiveSpan(ISpan * span) override
{
ctx->setActiveSpan(span);
Expand Down Expand Up @@ -1351,6 +1355,10 @@ class CRoxieServerActivity : implements CInterfaceOf<IRoxieServerActivity>, impl
return traceLevel;
}

virtual ISpan * queryActiveSpan() const override
{
return ctx->queryActiveSpan();
}
virtual void setActiveSpan(ISpan * span) override
{
if (ctx)
Expand Down
4 changes: 4 additions & 0 deletions system/jlib/jlog.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2868,6 +2868,10 @@ class DummyLogCtx : implements IContextLogger
{
return 0;
}
virtual ISpan * queryActiveSpan() const override
{
return activeSpan;
}
virtual void setActiveSpan(ISpan * span) override
{
activeSpan.set(span);
Expand Down
6 changes: 5 additions & 1 deletion system/jlib/jlog.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1271,12 +1271,16 @@ interface jlib_decl IContextLogger : extends IInterface
virtual const char *queryLocalId() const = 0;
virtual const char *queryCallerId() const = 0;
virtual const CRuntimeStatisticCollection & queryStats() const = 0;
virtual void setActiveSpan(ISpan * span) = 0;
virtual ISpan * queryActiveSpan() const = 0;
virtual void setActiveSpan(ISpan * span) = 0; // Only call this function directly if this object's lifetime matches the lifetime of the span. If there is any doubt use ContextSpanScope(ctx, span)
virtual IProperties * getClientHeaders() const = 0;
virtual void recordStatistics(IStatisticGatherer &progress) const = 0;
virtual const LogMsgJobInfo & queryJob() const { return unknownJob; }
};

using ContextSpanScope = ContextSpanScopeImp<IContextLogger>;


extern jlib_decl StringBuffer &appendGloballyUniqueId(StringBuffer &s);

extern jlib_decl const IContextLogger &queryDummyContextLogger();
Expand Down
48 changes: 48 additions & 0 deletions system/jlib/jtrace.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,54 @@ extern jlib_decl ISpan * getNullSpan();
extern jlib_decl void initTraceManager(const char * componentName, const IPropertyTree * componentConfig, const IPropertyTree * globalConfig);
extern jlib_decl ITraceManager & queryTraceManager();

//The following class is responsible for ensuring that the active span is restored in a context when the scope is exited
//Use a template class so it can be reused for IContextLogger and IEspContext
template <class CONTEXT>
class ContextSpanScopeImp
{
public:
ContextSpanScopeImp(CONTEXT & _ctx, ISpan * span)
: ctx(_ctx)
{
prev.set(ctx.queryActiveSpan());
ctx.setActiveSpan(span);
}
~ContextSpanScopeImp()
{
ctx.setActiveSpan(prev);
}

protected:
CONTEXT & ctx;
Owned<ISpan> prev;
};

// A variant of the class above that allows startSpan to be called after construction
template <class CONTEXT>
class DynamicContextSpanScopeImp
{
public:
~DynamicContextSpanScopeImp()
{
finishSpan();
}
void startSpan(CONTEXT & _ctx, ISpan * span)
{
ctx = &_ctx;
prev.set(ctx->queryActiveSpan());
ctx->setActiveSpan(span);
}
void finishSpan()
{
if (ctx)
ctx->setActiveSpan(prev);
}

protected:
CONTEXT * ctx{nullptr};
Owned<ISpan> prev;
};

/*
To use feature-level tracing flags, protect the tracing with a test such as:

Expand Down
2 changes: 1 addition & 1 deletion thorlcr/graph/thgraphmaster.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1486,7 +1486,7 @@ CJobMaster::CJobMaster(IConstWorkUnit &_workunit, const char *graphName, ILoaded

Owned<IProperties> traceHeaders = extractTraceDebugOptions(workunit);
Owned<ISpan> requestSpan = queryTraceManager().createServerSpan(workunit->queryWuid(), traceHeaders);
logctx->setActiveSpan(requestSpan);
ContextSpanScope spanScope(*logctx, requestSpan);

resumed = WUActionResume == workunit->getAction();
fatalHandler.setown(new CFatalHandler(globals->getPropInt("@fatal_timeout", FATAL_TIMEOUT)));
Expand Down
2 changes: 1 addition & 1 deletion thorlcr/graph/thgraphslave.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1681,7 +1681,7 @@ CJobSlave::CJobSlave(ISlaveWatchdog *_watchdog, IPropertyTree *_workUnitInfo, co

Owned<IProperties> traceHeaders = deserializeTraceDebugOptions(workUnitInfo->queryPropTree("Debug"));
Owned<ISpan> requestSpan = queryTraceManager().createServerSpan(wuid, traceHeaders);
logctx->setActiveSpan(requestSpan);
ContextSpanScope spanScope(*logctx, requestSpan);

oldNodeCacheMem = 0;
slavemptag = _slavemptag;
Expand Down

0 comments on commit d3c4362

Please sign in to comment.