diff --git a/VERSION b/VERSION index f90b1af..0bee604 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.3.2 +2.3.3 diff --git a/lib/NGT/NGTQ/Quantizer.h b/lib/NGT/NGTQ/Quantizer.h index aee5fd8..e934aca 100644 --- a/lib/NGT/NGTQ/Quantizer.h +++ b/lib/NGT/NGTQ/Quantizer.h @@ -1287,73 +1287,6 @@ class ScalarQuantizedInt8ObjectProcessingStream { const std::type_info* dataTypeInfo; }; -class ScalarQuantizedUint8TransposedObjectProcessingStream : public ScalarQuantizedInt8ObjectProcessingStream { - public: - ScalarQuantizedUint8TransposedObjectProcessingStream(size_t dimension, size_t nOfObjects, NGTQ::Quantizer &q): - ScalarQuantizedInt8ObjectProcessingStream(q) { - initialize(dimension, nOfObjects); - stream = new uint8_t[streamSize](); - } - - ScalarQuantizedUint8TransposedObjectProcessingStream(size_t dim): - ScalarQuantizedInt8ObjectProcessingStream(*reinterpret_cast(0)) { - initialize(dim, 0); - stream = 0; - } - - void initialize(size_t dim, size_t nOfObjects) { - dimension = dim; - numOfObjects = nOfObjects; - smallBlockSize = 16; - blockSize = 64; - lineSize = blockSize * nOfObjects; - setStreamSize(); - } - - void arrangeObject(size_t dataNo, std::vector &object, float scale, float offset, bool shift) { - if (dataNo >= numOfObjects) { - std::stringstream msg; - msg << "The data index is out of the range. " << dataNo << ":" << numOfObjects; - NGTThrowException(msg); - } - NGT::ObjectSpace::quantizeToQint8(object, *dataTypeInfo, dimension, offset, scale, shift); - size_t endOfBlock = ((object.size() - 1) / blockSize) * blockSize; - for (size_t i = 0; i < endOfBlock; i++) { - size_t idx = lineSize * (i / blockSize) + blockSize * dataNo + i % blockSize; - if (idx >= streamSize) { - std::stringstream msg; - msg << "Fatal inner error! (1) " << idx << ":" << streamSize << " " - << dataNo << ":" << i << "/" << object.size(); - NGTThrowException(msg); - } - stream[idx] = static_cast(object[i]); - } - for (size_t i = endOfBlock; i < object.size(); i++) { - size_t idx = lineSize * (i / blockSize) + (i - endOfBlock) / smallBlockSize * (smallBlockSize * numOfObjects) + smallBlockSize * dataNo + (i - endOfBlock) % smallBlockSize; - if (idx >= streamSize) { - std::stringstream msg; - msg << "Fatal inner error! (2) " << idx << ":" << streamSize << " " - << dataNo << ":" << i << "/" << object.size(); - NGTThrowException(msg); - } - stream[idx] = static_cast(object[i]); - } - } - - void setStreamSize() { - streamSize = ((dimension - 1) / smallBlockSize + 1) * smallBlockSize * numOfObjects; - return; - } - - size_t getStreamSize(size_t nOfObjects) { - numOfObjects = nOfObjects; - setStreamSize(); - return streamSize; - } - size_t smallBlockSize; - size_t blockSize; - size_t lineSize; -}; ///// class QuantizedObjectDistance { @@ -3048,148 +2981,6 @@ class ScalarQuantizedInt8ObjectDistance : public NonLocalQuantizedObjectDistance }; -class ScalarQuantizedUint8TransposedObjectDistance : public ScalarQuantizedInt8ObjectDistance { - public: - ScalarQuantizedUint8TransposedObjectDistance(Quantizer &q, DistanceType dtype):ScalarQuantizedInt8ObjectDistance(q, dtype) {} - ~ScalarQuantizedUint8TransposedObjectDistance() {} - -#ifdef NGTQBG_MIN - inline float operator()(void *inv, float *distances, size_t noOfObjects, DistanceLookupTableUint8 &distanceLUT, void *query) { -#else - inline void operator()(void *inv, float *distances, size_t noOfObjects, DistanceLookupTableUint8 &distanceLUT, void *query) { -#endif - if (query == 0) { - NGTThrowException("Fatal inner error! The specified query is invalid.."); - } - const uint8_t *object = static_cast(inv); - const uint8_t *qobject = static_cast(query); - const unsigned char *last = qobject + dimension; - __m256i sum256[noOfObjects]; -#if defined(NGT_AVX512) - { - __m512i sum512[noOfObjects]; - for (size_t oi = 0; oi < noOfObjects; oi++) { - sum512[oi] = _mm512_setzero_si512(); - } - const unsigned char *lastgroup = last - 63; - while (qobject < lastgroup) { - __m512i q = _mm512_loadu_si512(reinterpret_cast(qobject)); - for (size_t oi = 0; oi < noOfObjects; oi++) { - __m512i o = _mm512_loadu_si512(reinterpret_cast(object)); - __mmask64 m = _mm512_cmplt_epu8_mask(q, o); - __m512i x = _mm512_add_epi8(_mm512_maskz_subs_epu8(m, o, q), - _mm512_maskz_subs_epu8(~m, q, o)); - __m512i xi16 = _mm512_cvtepu8_epi16(_mm512_extracti32x8_epi32(x,0)); - sum512[oi] = _mm512_add_epi32(sum512[oi], _mm512_madd_epi16(xi16, xi16)); - xi16 = _mm512_cvtepu8_epi16(_mm512_extracti32x8_epi32(x,1)); - sum512[oi] = _mm512_add_epi32(sum512[oi], _mm512_madd_epi16(xi16, xi16)); - object += 64; - } - qobject += 64; - } - for (size_t oi = 0; oi < noOfObjects; oi++) { - sum256[oi] = _mm256_add_epi32(_mm512_extracti32x8_epi32(sum512[oi], 0), - _mm512_extracti32x8_epi32(sum512[oi], 1)); - } - } -#elif defined(NGT_AVX2) - { - for (size_t oi = 0; oi < noOfObjects; oi++) { - sum256[oi] = _mm256_setzero_si256(); - } - //__attribute__((aligned(32))) uint16_t iv[16]; - const unsigned char *lastgroup = last - 63; - while (qobject < lastgroup) { - //std::cerr << "qobj=" << (size_t)qobject << ":" << (size_t)lastgroup << std::endl; - __m256i q[4]; - q[0] = _mm256_cvtepu8_epi16(_mm_loadu_si128((__m128i const*)qobject)); - qobject += 16; - q[1] = _mm256_cvtepu8_epi16(_mm_loadu_si128((__m128i const*)qobject)); - qobject += 16; - q[2] = _mm256_cvtepu8_epi16(_mm_loadu_si128((__m128i const*)qobject)); - qobject += 16; - q[3] = _mm256_cvtepu8_epi16(_mm_loadu_si128((__m128i const*)qobject)); - qobject += 16; - for (size_t oi = 0; oi < noOfObjects; oi++) { - __m256i o = _mm256_cvtepu8_epi16(_mm_loadu_si128((__m128i const*)object)); - __m256i sub16 = _mm256_subs_epi16(q[0], o); - sum256[oi] = _mm256_add_epi32(sum256[oi], _mm256_madd_epi16(sub16, sub16)); - object += 16; - o = _mm256_cvtepu8_epi16(_mm_loadu_si128((__m128i const*)object)); - sub16 = _mm256_subs_epi16(q[1], o); - sum256[oi] = _mm256_add_epi32(sum256[oi], _mm256_madd_epi16(sub16, sub16)); - object += 16; - o = _mm256_cvtepu8_epi16(_mm_loadu_si128((__m128i const*)object)); - sub16 = _mm256_subs_epi16(q[2], o); - sum256[oi] = _mm256_add_epi32(sum256[oi], _mm256_madd_epi16(sub16, sub16)); - object += 16; - o = _mm256_cvtepu8_epi16(_mm_loadu_si128((__m128i const*)object)); - sub16 = _mm256_subs_epi16(q[3], o); - sum256[oi] = _mm256_add_epi32(sum256[oi], _mm256_madd_epi16(sub16, sub16)); - object += 16; - } - } - } -#endif - { - //__attribute__((aligned(32))) uint16_t iv[16]; - const unsigned char *lastgroup = last - 15; - while (qobject < lastgroup) { - __m256i q = _mm256_cvtepu8_epi16(_mm_loadu_si128((__m128i const*)qobject)); - for (size_t oi = 0; oi < noOfObjects; oi++) { - __m256i o = _mm256_cvtepu8_epi16(_mm_loadu_si128((__m128i const*)object)); - __m256i sub16 = _mm256_subs_epi16(q, o); - sum256[oi] = _mm256_add_epi32(sum256[oi], _mm256_madd_epi16(sub16, sub16)); - object += 16; - } - qobject += 16; - } - } -#ifdef NGTQBG_MIN - float min = std::numeric_limits::max(); -#endif - const __m256i value0 = _mm256_set1_epi32(0); - for (size_t oi = 0; oi < noOfObjects; oi++) { - __m256i tmp1 = _mm256_hadd_epi32(sum256[oi], value0); - __m256i tmp2 = _mm256_hadd_epi32(tmp1, value0); - distances[oi] = _mm256_extract_epi32(tmp2, 0) + _mm256_extract_epi32(tmp2, 4); - if (distances[oi] < min) { - min = distances[oi]; - } - } -#ifdef NGTQBG_MIN - return min; -#endif - } - -#ifdef NGTQBG_MIN - inline float operator()(void *inv, float *distances, size_t noOfObjects, DistanceLookupTableUint8 &distanceLUT, void *query, std::vector &queryList) { - return 0.0; -#else - inline void operator()(void *inv, float *distances, size_t noOfObjects, DistanceLookupTableUint8 &distanceLUT, void *query, std::vector &queryList) { -#endif - } - - uint8_t *generateRearrangedObjects(NGTQ::InvertedIndexEntry &invertedIndexObjects) { - if (invertedIndexObjects.numOfSubvectors != localDivisionNo) { - std::stringstream msg; - msg << "Internal fatal error. Invalid # of subvectos. " << invertedIndexObjects.numOfSubvectors << ":" << localDivisionNo; - NGTThrowException(msg); - } - ScalarQuantizedUint8TransposedObjectProcessingStream processingStream(invertedIndexObjects.numOfSubvectors, invertedIndexObjects.size(), quantizer); - processingStream.arrange(invertedIndexObjects); - return processingStream.getStream(); - } - void restoreIntoInvertedIndex(NGTQ::InvertedIndexEntry &invertedIndexObjects, - size_t numOfSubspaces, std::vector &ids, void *objects) { - NGTThrowException("not implemented"); - } - size_t getSizeOfCluster(size_t noOfObjects) { - ScalarQuantizedUint8TransposedObjectProcessingStream processingStream(localDivisionNo); - return processingStream.getStreamSize(noOfObjects);; - } - -}; #endif