Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/candidate-9.8.x'
Browse files Browse the repository at this point in the history
Signed-off-by: Jake Smith <jake.smith@lexisnexisrisk.com>
  • Loading branch information
jakesmith committed Jan 10, 2025
2 parents 908fd65 + 7008da4 commit b061d66
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 119 deletions.
35 changes: 31 additions & 4 deletions dali/base/dadfs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13837,21 +13837,48 @@ void configurePreferredPlanes()

static bool doesPhysicalMatchMeta(IPropertyTree &partProps, IFile &iFile, offset_t expectedSize, offset_t &actualSize)
{
constexpr unsigned delaySecs = 5;
// NB: temporary workaround for 'narrow' files publishing extra empty parts with the wrong @compressedSize(0)
// causing a new check introduced in HPCC-33064 to be hit (fixed in HPCC-33113, but will continue to affect exiting files)
unsigned __int64 size = partProps.getPropInt64("@size", unknownFileSize);
unsigned __int64 compressedSize = partProps.getPropInt64("@compressedSize", unknownFileSize);
if ((0 == size) && (0 == compressedSize))
{
actualSize = unknownFileSize;
// either this is a file from 9.10 where empty compressed files can be zero length (no header)
// or it's a pre 9.10 (and pre HPCC-33133 fix) dummy part created with incorrect a @compressedSize of 0
// (also in future empty physical files may legitimately not exist)
// If file exists check that the size is either 0, or the compressed header size (the size of an empty compressed file pre 9.10)
actualSize = iFile.size();
if (unknownFileSize != actualSize) // file exists. NB: ok not to exist for future compatibility where 0-length files not written
{
if (0 != actualSize) // could be zero if file from >= 9.10
{
constexpr size32_t nonEmptyCompressedFileSize = 56; // min size of a non-empty compressed file (with header) - 56 bytes
if (nonEmptyCompressedFileSize != actualSize)
{
// in >= 9.8 - this could 1st check getWriteSyncMarginMs() and only check if not set.
WARNLOG("Empty compressed file %s's size (%" I64F "u) is not expected size of 0 or %u - retry after %u second delay", iFile.queryFilename(), actualSize, nonEmptyCompressedFileSize, delaySecs);
MilliSleep(delaySecs * 1000);
actualSize = iFile.size();
if ((0 != actualSize) && (nonEmptyCompressedFileSize != actualSize))
return false; // including if unknownFileSize - no longer exists!
}
}
}
return true;
}

if (expectedSize != unknownFileSize)
else if (expectedSize != unknownFileSize)
{
actualSize = iFile.size();
if (actualSize != expectedSize)
return false;
{
// in >= 9.8 - this could 1st check getWriteSyncMarginMs() and only check if not set.
WARNLOG("File %s's size (%" I64F "u) does not match meta size (%" I64F "u) - retry after %u second delay", iFile.queryFilename(), actualSize, expectedSize, delaySecs);
MilliSleep(delaySecs * 1000);
actualSize = iFile.size();
if (actualSize != expectedSize)
return false;
}
}
else
actualSize = unknownFileSize;
Expand Down
116 changes: 1 addition & 115 deletions testing/unittests/jlibtests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4936,121 +4936,6 @@ class JLibSecretsTest : public CppUnit::TestFixture

CPPUNIT_TEST_SUITE_REGISTRATION( JLibSecretsTest );
CPPUNIT_TEST_SUITE_NAMED_REGISTRATION( JLibSecretsTest, "JLibSecretsTest" );
class HashFuncTests : public CppUnit::TestFixture
{
public:
virtual void setUp() override
{
generateFixedRandomNullTermiantedStrings();
}

CPPUNIT_TEST_SUITE(HashFuncTests);
CPPUNIT_TEST(fnvTests);
CPPUNIT_TEST_SUITE_END();

protected:
static constexpr unsigned maxCalls = 10'000'000;
static constexpr unsigned minLen = 5, maxLen = 100;
static constexpr unsigned lenRange = maxLen - minLen + 1;
static constexpr unsigned randomSeed = 42;
static constexpr size_t testBufferSize = 1'000'000;
CCycleTimer timer;
std::vector<unsigned char> buffer;

unsigned getOffsetLenHash(unsigned offset, unsigned hash)
{
hash ^= (offset * 0x27D4EB2D); // use MurMurHash3 multiplier to introduce more randomness
hash *= fnvPrime32;
return hash;
}
void generateFixedRandomNullTermiantedStrings()
{
buffer.resize(testBufferSize);
std::mt19937 rng(randomSeed);
std::uniform_int_distribution<unsigned char> dist(1, 255);

unsigned offset = 0;
unsigned lenHash = fnvInitialHash32;
while (offset < testBufferSize)
{
// create str lengths between min and max based on offset,
// so that we can predictably read them back
lenHash = getOffsetLenHash(offset, lenHash);
unsigned len = (lenHash % lenRange) + minLen;

if (offset + len + 1 >= testBufferSize)
break;

for (unsigned i=0; i<len; i++)
buffer[offset + i] = dist(rng);
buffer[offset + len] = '\0';

offset += len + 1;
}
}
template <unsigned (*HASHCFUNC)(const unsigned char *, unsigned, unsigned)>
void testHashc()
{
unsigned hashResult = fnvInitialHash32;

unsigned offset = 0;
for (unsigned i=0; i<maxCalls; i++)
{
unsigned len = (i % 100) + 5;
if (offset + len > buffer.size())
offset = 0;

hashResult ^= HASHCFUNC(&buffer[offset], len, hashResult);
offset += len;
}
CPPUNIT_ASSERT(hashResult != 0);
}
template <unsigned (*HASHCZFUNC)(const unsigned char *, unsigned)>
void testHashcz()
{
unsigned hashResult = fnvInitialHash32;

unsigned lenHash = fnvInitialHash32;
unsigned offset = 0;
for (unsigned i=0; i<maxCalls; i++)
{
// get next length, as populated by generate function
lenHash = getOffsetLenHash(offset, lenHash);
unsigned len = (lenHash % lenRange) + minLen;

if (offset + (len + 1) > buffer.size())
{
offset = 0;
lenHash = getOffsetLenHash(offset, fnvInitialHash32);
len = (lenHash % lenRange) + minLen;
}
dbgassertex(len == strlen((const char *)&buffer[offset]));
hashResult ^= HASHCZFUNC(&buffer[offset], hashResult);
offset += len + 1;
}
CPPUNIT_ASSERT(hashResult != 0);
}
void measure(const char *funcName, const std::function<void(void)> &func)
{
timer.reset();
func();
unsigned elapsed = timer.elapsedMs();
double throughput = static_cast<double>(maxCalls) / elapsed * 1000;
PROGLOG("%s: %u calls took %u ms (%.2f hashes/sec)", funcName, maxCalls, elapsed, throughput);
}
void fnvTests()
{
measure("deprecatedHashc (fnv1)", [this]() { testHashc<deprecatedHashc>(); });
measure("deprecatedHashcz (fnv1)", [this]() { testHashcz<deprecatedHashcz>(); });
measure("hashc (fnv1)", [this]() { testHashc<hashc>(); });
measure("hashcz (fnv1)", [this]() { testHashcz<hashcz>(); });
measure("hashc_fnv1a", [this]() { testHashc<hashc_fnv1a>(); });
measure("hashcz_fnv1a", [this]() { testHashcz<hashcz_fnv1a>(); });
}
};

CPPUNIT_TEST_SUITE_REGISTRATION( HashFuncTests );
CPPUNIT_TEST_SUITE_NAMED_REGISTRATION( HashFuncTests, "HashFuncTests" );

class JLibStringTest : public CppUnit::TestFixture
{
Expand Down Expand Up @@ -5323,6 +5208,7 @@ class getaddrinfotest : public CppUnit::TestFixture

CPPUNIT_TEST_SUITE_REGISTRATION( getaddrinfotest );
CPPUNIT_TEST_SUITE_NAMED_REGISTRATION( getaddrinfotest, "getaddrinfotest" );

class HashFuncTests : public CppUnit::TestFixture
{
public:
Expand Down

0 comments on commit b061d66

Please sign in to comment.