Skip to content

Commit

Permalink
Bind out crc64 (#597)
Browse files Browse the repository at this point in the history
  • Loading branch information
DmitriyMusatkin authored Sep 13, 2024
1 parent 2dae492 commit 1a20c4e
Show file tree
Hide file tree
Showing 7 changed files with 102 additions and 14 deletions.
9 changes: 9 additions & 0 deletions awscrt/checksums.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,12 @@ def crc32c(input: bytes, previous_crc32c: int = 0) -> int:
Returns an unsigned 32-bit integer.
"""
return _awscrt.checksums_crc32c(input, previous_crc32c)


def crc64nvme(input: bytes, previous_crc64nvme: int = 0) -> int:
"""
Perform a CRC64 NVME computation.
If continuing to update a running CRC, pass its value into `previous_crc64nvme`.
Returns an unsigned 64-bit integer.
"""
return _awscrt.checksums_crc64nvme(input, previous_crc64nvme)
1 change: 1 addition & 0 deletions source/checksums.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,6 @@

PyObject *aws_py_checksums_crc32(PyObject *self, PyObject *args);
PyObject *aws_py_checksums_crc32c(PyObject *self, PyObject *args);
PyObject *aws_py_checksums_crc64nvme(PyObject *self, PyObject *args);

#endif /* AWS_CRT_PYTHON_CHECKSUMS_H */
59 changes: 47 additions & 12 deletions source/crc.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

#include "aws/checksums/crc.h"
#include "aws/common/byte_buf.h"
PyObject *checksums_crc_common(PyObject *args, uint32_t (*checksum_fn)(const uint8_t *, int, uint32_t)) {
PyObject *checksums_crc32_common(PyObject *args, uint32_t (*checksum_fn)(const uint8_t *, size_t, uint32_t)) {
Py_buffer input;
PyObject *py_previousCrc;
PyObject *py_result = NULL;
Expand Down Expand Up @@ -39,18 +39,11 @@ PyObject *checksums_crc_common(PyObject *args, uint32_t (*checksum_fn)(const uin

/* clang-format off */
Py_BEGIN_ALLOW_THREADS
/* Avoid truncation of length for very large buffers. crc() takes
length as an int, which may be narrower than Py_ssize_t. */
while ((size_t)len > INT_MAX) {
val = checksum_fn(buf, INT_MAX, val);
buf += (size_t)INT_MAX;
len -= (size_t)INT_MAX;
}
val = checksum_fn(buf, (int)len, val);
val = checksum_fn(buf, (size_t)len, val);
Py_END_ALLOW_THREADS
/* clang-format on */
} else {
val = checksum_fn(input.buf, (int)input.len, val);
val = checksum_fn(input.buf, (size_t)input.len, val);
}
py_result = PyLong_FromUnsignedLong(val);
done:
Expand All @@ -62,10 +55,52 @@ PyObject *checksums_crc_common(PyObject *args, uint32_t (*checksum_fn)(const uin

PyObject *aws_py_checksums_crc32(PyObject *self, PyObject *args) {
(void)self;
return checksums_crc_common(args, aws_checksums_crc32);
return checksums_crc32_common(args, aws_checksums_crc32_ex);
}

PyObject *aws_py_checksums_crc32c(PyObject *self, PyObject *args) {
(void)self;
return checksums_crc_common(args, aws_checksums_crc32c);
return checksums_crc32_common(args, aws_checksums_crc32c_ex);
}

PyObject *aws_py_checksums_crc64nvme(PyObject *self, PyObject *args) {
(void)self;
Py_buffer input;
PyObject *py_previousCrc64;
PyObject *py_result = NULL;

if (!PyArg_ParseTuple(args, "s*O", &input, &py_previousCrc64)) {
return NULL;
}

/* Note: PyArg_ParseTuple() doesn't do overflow checking on unsigned values
* so use PyLong_AsUnsignedLongLong() to get the value of the previousCrc arg */
uint64_t previousCrc = PyLong_AsUnsignedLongLong(py_previousCrc64);

if (previousCrc == (uint64_t)-1 && PyErr_Occurred()) {
goto done;
}

if (!PyBuffer_IsContiguous(&input, 'C')) {
PyErr_SetString(PyExc_ValueError, "input must be contiguous buffer");
goto done;
}

/* Releasing the GIL for very small buffers is inefficient
and may lower performance */
if (input.len > 1024 * 5) {
/* clang-format off */
Py_BEGIN_ALLOW_THREADS
previousCrc = aws_checksums_crc64nvme_ex(input.buf, (size_t)input.len, previousCrc);
Py_END_ALLOW_THREADS
/* clang-format on */
} else {
previousCrc = aws_checksums_crc64nvme_ex(input.buf, (size_t)input.len, previousCrc);
}
py_result = PyLong_FromUnsignedLongLong(previousCrc);
done:
if (input.obj) {
PyBuffer_Release(&input);
}
return py_result;
}
1 change: 1 addition & 0 deletions source/module.c
Original file line number Diff line number Diff line change
Expand Up @@ -730,6 +730,7 @@ static PyMethodDef s_module_methods[] = {
/* Checksum primitives */
AWS_PY_METHOD_DEF(checksums_crc32, METH_VARARGS),
AWS_PY_METHOD_DEF(checksums_crc32c, METH_VARARGS),
AWS_PY_METHOD_DEF(checksums_crc64nvme, METH_VARARGS),

/* HTTP */
AWS_PY_METHOD_DEF(http_connection_close, METH_VARARGS),
Expand Down
42 changes: 42 additions & 0 deletions test/test_checksums.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,48 @@ def test_crc32c_huge_buffer(self):
val = checksums.crc32c(huge_buffer)
self.assertEqual(0x572a7c8a, val)

def test_crc64nvme_zeros_one_shot(self):
output = checksums.crc64nvme(bytes(32))
expected = 0xcf3473434d4ecf3b
self.assertEqual(expected, output)

def test_crc64nvme_zeros_iterated(self):
output = 0
for i in range(32):
output = checksums.crc64nvme(bytes(1), output)
expected = 0xcf3473434d4ecf3b
self.assertEqual(expected, output)

def test_crc64nvme_values_one_shot(self):
output = checksums.crc64nvme(''.join(chr(i) for i in range(32)))
expected = 0xb9d9d4a8492cbd7f
self.assertEqual(expected, output)

def test_crc64nvme_values_iterated(self):
output = 0
for i in range(32):
output = checksums.crc64nvme(chr(i), output)
expected = 0xb9d9d4a8492cbd7f
self.assertEqual(expected, output)

def test_crc64nvme_large_buffer(self):
# stress test gil optimization for 32 bit architecture which cannot handle huge buffer
large_buffer = bytes(25 * 2**20)
val = checksums.crc64nvme(large_buffer)
self.assertEqual(0x5b6f5045463ca45e, val)

def test_crc64nvme_huge_buffer(self):
if sys.platform.startswith('freebsd'):
# Skip this test for freebsd, as it simply crashes instead of raising exception in this case
raise unittest.SkipTest('Skip this test for freebsd')
try:
INT_MAX = 2**32 - 1
huge_buffer = bytes(INT_MAX + 5)
except BaseException:
raise unittest.SkipTest('Machine cant allocate giant buffer for giant buffer test')
val = checksums.crc64nvme(huge_buffer)
self.assertEqual(0x2645c28052b1fbb0, val)


if __name__ == '__main__':
unittest.main()

0 comments on commit 1a20c4e

Please sign in to comment.