diff --git a/CMakeLists.txt b/CMakeLists.txt index 8f96c8d..2913b41 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -49,11 +49,40 @@ FetchContent_Declare( ) FetchContent_MakeAvailable(spdlog) +# Handle inclusion of dx2 dependency in a choice of several ways +set(dx2_SOURCE "" CACHE FILEPATH "Location of dx2 source directory") +set(_dx2_already_populated "${dx2_SOURCE_DIR}") +if(dx2_SOURCE) + # Explicitly told where to find dx2 + add_subdirectory(${dx2_SOURCE} dx2) + set(dx2_SOURCE_DIR "${dx2_SOURCE}") +elseif(EXISTS ${CMAKE_SOURCE_DIR}/dx2) + # dx2 exists as a local checkout + add_subdirectory(dx2) + set(dx2_SOURCE_DIR "${CMAKE_SOURCE_DIR}/dx2") +else() + # Try finding an installed dx2, or fetch + FetchContent_Declare( + dx2 + GIT_REPOSITORY https://github.com/dials/dx2 + GIT_TAG 2654929bc73e0d699dfdfcc2d924c3454e434ccf + FIND_PACKAGE_ARGS + ) + FetchContent_MakeAvailable(dx2) +endif() +# Only print a message when as a new action +if (NOT dx2_SOURCE_DIR STREQUAL _dx2_already_populated) + message(STATUS "Found dx2: ${dx2_SOURCE_DIR}") +endif() + # Make a small library that we can link to to get the version project(version CXX) configure_file(version.cc.in version.cc @ONLY) add_library(version STATIC version.cc) +enable_testing() + add_subdirectory(h5read) add_subdirectory(baseline) add_subdirectory(spotfinder) +add_subdirectory(baseline_indexer) diff --git a/baseline_indexer/CMakeLists.txt b/baseline_indexer/CMakeLists.txt new file mode 100644 index 0000000..e6f6656 --- /dev/null +++ b/baseline_indexer/CMakeLists.txt @@ -0,0 +1,48 @@ +project(indexer CXX) + +set(CMAKE_CXX_STANDARD 20) + +find_package (Threads) + +# Automatic Dependencies +set(FETCHCONTENT_QUIET OFF) +include(FetchContent) +FetchContent_Declare( + Eigen3 + GIT_REPOSITORY https://gitlab.com/libeigen/eigen.git + GIT_TAG 3.4.0 + EXCLUDE_FROM_ALL + FIND_PACKAGE_ARGS +) +FetchContent_Declare( + pocketfft + GIT_REPOSITORY https://github.com/mreineck/pocketfft + GIT_TAG cpp +) +FetchContent_MakeAvailable(Eigen3) +FetchContent_MakeAvailable(pocketfft) + +FetchContent_GetProperties(pocketfft) +add_library(pocketfft INTERFACE) +target_include_directories(pocketfft INTERFACE ${pocketfft_SOURCE_DIR}) + +# GTest could have been made available under a different name +if(NOT TARGET GTest::gtest_main) + FetchContent_MakeAvailable(GTest) +endif() + +add_subdirectory(tests) +add_executable(baseline_indexer + indexer.cc +) +target_link_libraries(baseline_indexer + PRIVATE + fmt + dx2 + Eigen3::Eigen + pocketfft + argparse + nlohmann_json::nlohmann_json + spdlog::spdlog + ${CMAKE_THREAD_LIBS_INIT} +) diff --git a/baseline_indexer/fft3d.cc b/baseline_indexer/fft3d.cc new file mode 100644 index 0000000..0303daf --- /dev/null +++ b/baseline_indexer/fft3d.cc @@ -0,0 +1,177 @@ +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "common.hpp" + +using Eigen::Matrix3d; +using Eigen::Vector3d; +using Eigen::Vector3i; + +#define _USE_MATH_DEFINES +#include + +using namespace pocketfft; + +/** + * @brief map reciprocal space vectors onto a grid of size n_points^3. + * @param reciprocal_space_vectors Reciprocal space vectors to be mapped. + * @param data_in The vector (grid) which the data will be mapped to. + * @param selection The vector of the selection of points mapped to the grid. + * @param d_min A resolution limit for mapping to the grid. + * @param b_iso The isotropic B-factor used to weight the points as a function of resolution. + * @param n_points The size of each dimension of the FFT grid. + */ +void map_centroids_to_reciprocal_space_grid( + std::vector const &reciprocal_space_vectors, + std::vector> &data_in, + std::vector &selection, + double d_min, + double b_iso = 0, + uint32_t n_points = 256) { + assert(data_in.size() == n_points * n_points * n_points); + // Determine the resolution span of the grid so we know how to map + // each coordinate to the grid. + // -dmin to +dmin spans the coordinate range from 0 to N (in each dimension), + // but the discrete FFT requires input coefficients 0 to N-1, and so our + // grid goes from 0 .. N-1 and we don't use data that maps to grid point N + // (which is equivalent to grid point 0) + + const double rlgrid = 2 / (d_min * n_points); + const double one_over_rlgrid = 1 / rlgrid; + const int half_n_points = n_points / 2; + int count = 0; + + for (int i = 0; i < reciprocal_space_vectors.size(); i++) { + const Vector3d v = reciprocal_space_vectors[i]; + const double v_length = v.norm(); + const double d_spacing = 1 / v_length; + if (d_spacing < d_min) { + selection[i] = false; + continue; + } + Vector3i coord; + // map to the nearest point in each dimension. + for (int j = 0; j < 3; j++) { + coord[j] = static_cast(round(v[j] * one_over_rlgrid)) + half_n_points; + } + if ((coord.maxCoeff() >= n_points) || coord.minCoeff() < 0) { + // We can get maxcoeff == n_points if we anear +dmin, see comment above. + selection[i] = false; + continue; + } + // Use the b_iso to determine the weight for each coordinate. + double T; + if (b_iso != 0) { + T = std::exp(-b_iso * v_length * v_length / 4.0); + } else { + T = 1; + } + // unravel to the 1d index and write the complex value. + size_t index = + coord[2] + (n_points * coord[1]) + (n_points * n_points * coord[0]); + if (!data_in[index].real()) { + count++; + } + data_in[index] = {T, 0.0}; + } + logger->info("Number of centroids used: {}", count); +} + +/** + * @brief Perform a 3D FFT of the reciprocal space coordinates (spots). + * @param reciprocal_space_vectors The input vector of reciprocal space coordinates. + * @param real_out The (real) array that the FFT result will be written to. + * @param d_min Cut the data at this resolution limit for the FFT + * @param b_iso The isotropic B-factor used to weight the points as a function of resolution. + * @param n_points The size of each dimension of the FFT grid. + * @returns A boolean array indicating which coordinates were used for the FFT. + */ +std::vector fft3d(std::vector const &reciprocal_space_vectors, + std::vector &real_out, + double d_min, + double b_iso = 0, + uint32_t n_points = 256, + size_t nthreads = 1) { + auto start = std::chrono::system_clock::now(); + assert(real_out.size() == n_points * n_points * n_points); + + // We want to write out the real part of the FFT, but the pocketfft functions require + // complex vectors (we are using c2c i.e. complex to complex), so initialise these vectors. + // Note we should be able to use c2r rather than c2c, but I couldn't get this to work with + // the output ordering in c2r (JBE). + std::vector> complex_data_in(n_points * n_points * n_points); + + // A boolean array of whether the vectors were used for the FFT. + std::vector used_in_indexing(reciprocal_space_vectors.size(), true); + auto t1 = std::chrono::system_clock::now(); + + // Map the vectors onto a discrete grid. The values of the grid points are weights + // determined by b_iso. + map_centroids_to_reciprocal_space_grid(reciprocal_space_vectors, + complex_data_in, + used_in_indexing, + d_min, + b_iso, + n_points); + auto t2 = std::chrono::system_clock::now(); + + // Prepare the required objects for the FFT. + shape_t shape_in{n_points, n_points, n_points}; + int stride_x = sizeof(std::complex); + int stride_y = static_cast(sizeof(std::complex) * n_points); + int stride_z = static_cast(sizeof(std::complex) * n_points * n_points); + stride_t stride_in{ + stride_x, stride_y, stride_z}; // must have the size of each element. Must have + // size() equal to shape_in.size() + stride_t stride_out{ + stride_x, stride_y, stride_z}; // must have the size of each element. Must + // have size() equal to shape_in.size() + shape_t axes{0, 1, 2}; // 0 to shape.size()-1 inclusive + bool forward{FORWARD}; + + double fct{1.0f}; + // note, threads can be higher than the number of hardware threads. + // It is not clear what the best value is for this. + logger->info("Performing FFT with nthreads={}", nthreads); + // Do the FFT. + c2c( + shape_in, + stride_in, + stride_out, + axes, + forward, + complex_data_in.data(), + complex_data_in + .data(), // this is the output array, we are going to overwrite the input as we don't need it + fct, + nthreads); + auto t3 = std::chrono::system_clock::now(); + + // Take the square of the real part as the output. + for (int i = 0; i < real_out.size(); ++i) { + real_out[i] = std::pow(complex_data_in[i].real(), 2); + } + auto t4 = std::chrono::system_clock::now(); + + std::chrono::duration elapsed_seconds = t4 - start; + std::chrono::duration elapsed_map = t2 - t1; + std::chrono::duration elapsed_make_arrays = t1 - start; + std::chrono::duration elapsed_c2c = t3 - t2; + std::chrono::duration elapsed_square = t4 - t3; + logger->debug("Total time for fft3d: {:.5f}s", elapsed_seconds.count()); + logger->debug("elapsed time for making data arrays: {:.5f}s", + elapsed_make_arrays.count()); + logger->debug("elapsed time for map_to_recip: {:.5f}s", elapsed_map.count()); + logger->debug("elapsed time for c2c: {:.5f}s", elapsed_c2c.count()); + logger->debug("elapsed time for squaring: {:.5f}s", elapsed_square.count()); + + return used_in_indexing; +} diff --git a/baseline_indexer/flood_fill.cc b/baseline_indexer/flood_fill.cc new file mode 100644 index 0000000..ba1a7d4 --- /dev/null +++ b/baseline_indexer/flood_fill.cc @@ -0,0 +1,192 @@ +#include + +#include +#include +#include +#include +#include +#include +#define _USE_MATH_DEFINES +#include +#include +#include + +#include "common.hpp" + +using Eigen::Vector3d; +using Eigen::Vector3i; + +// Define a modulo function that returns python style modulo for negative numbers. +int modulo(int i, int n) { + return (i % n + n) % n; +} + +/** + * @brief Perform a flood fill algorithm on a grid of data to determine connected areas of signal. + * @param grid The input array (grid) of data + * @param rmsd_cutoff Filter out grid points below this cutoff value + * @param n_points The size of each dimension of the FFT grid. + * @returns A tuple of grid points per peak and centres of mass of the peaks in fractional coordinates. + */ +std::tuple, std::vector> flood_fill( + std::vector const& grid, + double rmsd_cutoff = 15.0, + int n_points = 256) { + auto start = std::chrono::system_clock::now(); + assert(grid.size() == n_points * n_points * n_points); + // First calculate the rmsd and use this to create a binary grid + double sumg = std::accumulate(grid.begin(), grid.end(), 0.0); + double meang = sumg / grid.size(); + double sum_delta_sq = std::accumulate( + grid.begin(), grid.end(), 0.0, [meang](double total, const double& val) { + return total + std::pow(val - meang, 2); + }); + double rmsd = std::pow(sum_delta_sq / grid.size(), 0.5); + + // Most of the binary grid will be zero, so use an + // unordered map rather than vector. + std::unordered_map grid_binary; + double cutoff = rmsd_cutoff * rmsd; + for (int i = 0; i < grid.size(); i++) { + if (grid[i] >= cutoff) { + grid_binary[i] = 1; + } + } + auto t2 = std::chrono::system_clock::now(); + std::chrono::duration elapsed_time = t2 - start; + logger->debug("Time for first part of flood fill: {:.5f}s", elapsed_time.count()); + + // Now do the flood fill. + // Wrap around the edge in all three dimensions to replicate the DIALS + // results exactly. + int n_voids = 0; + std::stack stack; + std::vector> accumulators; + int target = 1; + int replacement = 2; + std::vector grid_points_per_void; + int accumulator_index = 0; + + // precalculate a few constants + int total = n_points * n_points * n_points; + int n_sq = n_points * n_points; + int n_sq_minus_n = n_points * (n_points - 1); + int nn_sq_minus_n = n_points * n_points * (n_points - 1); + for (auto& it : grid_binary) { + if (it.second == target) { + // Convert the array index into xyz coordinates. + // Store xyz coordinates on the stack, but index the array with 1D index. + int i = it.first; + int x = i % n_points; + int y = (i % n_sq) / n_points; + int z = i / n_sq; + Vector3i xyz = {x, y, z}; + stack.push(xyz); + grid_binary[i] = replacement; + std::vector this_accumulator; + accumulators.push_back(this_accumulator); + n_voids++; + grid_points_per_void.push_back(0); + + while (!stack.empty()) { + Vector3i this_xyz = stack.top(); + stack.pop(); + accumulators[accumulator_index].push_back(this_xyz); + grid_points_per_void[accumulator_index]++; + + // Predefined neighbor offsets for 6-connected neighbors + static const std::array neighbors = {Vector3i{1, 0, 0}, + Vector3i{-1, 0, 0}, + Vector3i{0, 1, 0}, + Vector3i{0, -1, 0}, + Vector3i{0, 0, 1}, + Vector3i{0, 0, -1}}; + + int modx = modulo(this_xyz[0], n_points); + int mody = modulo(this_xyz[1], n_points) * n_points; + int modz = modulo(this_xyz[2], n_points) * n_sq; + + for (const Vector3i& offset : neighbors) { + // Compute the neighbor position + Vector3i neighbor = this_xyz + offset; + // Compute the flattened 1D array index for the neighbor + int array_index = + (offset[0] ? modulo(neighbor[0], n_points) : modx) + // x + (offset[1] ? (modulo(neighbor[1], n_points) * n_points) : mody) + + // y + (offset[2] ? (modulo(neighbor[2], n_points) * n_sq) : modz); // z + + // Check if the neighbor matches the target and push to the stack + std::unordered_map::const_iterator found = + grid_binary.find(array_index); + if (found != grid_binary.end()) { + if (found->second == target) { + grid_binary[array_index] = replacement; + stack.push(neighbor); + } + } + } + } + replacement++; + accumulator_index++; + } + } + auto t3 = std::chrono::system_clock::now(); + std::chrono::duration elapsed_time2 = t3 - t2; + logger->debug("Time for second part of flood fill: {:.5f}s", elapsed_time2.count()); + + // Now calculate the unweighted centres of mass of each group, in fractional coordinates. + std::vector centres_of_mass_frac(n_voids); + for (int i = 0; i < accumulators.size(); i++) { + std::vector values = accumulators[i]; + int n = values.size(); + double divisor = static_cast(n * n_points); + Vector3i sum = std::accumulate(values.begin(), values.end(), Vector3i{0, 0, 0}); + centres_of_mass_frac[i] = { + sum[2] / divisor, sum[1] / divisor, sum[0] / divisor}; //z,y,x + } + return std::make_tuple(grid_points_per_void, centres_of_mass_frac); +} + +/** + * @brief Perform a filter on the flood fill results. + * @param grid_points_per_void The number of grid points in each peak + * @param centres_of_mass_frac The centres of mass of each peak, in fractional coordinates + * @param peak_volume_cutoff The minimum fractional threshold for peaks to be included. + * @returns A tuple of grid points per peak and centres of mass of the peaks in fractional coordinates. + */ +std::tuple, std::vector> flood_fill_filter( + std::vector grid_points_per_void, + std::vector centres_of_mass_frac, + double peak_volume_cutoff = 0.15) { + // Filter out based on iqr range and peak_volume_cutoff + std::vector grid_points_per_void_unsorted(grid_points_per_void); + // Acting on a copy of the input data. + std::sort(grid_points_per_void.begin(), grid_points_per_void.end()); + // The peak around the origin of the FFT can be very large in volume, + // so use the IQR range to filter high-volume peaks out that are not + // from the typical distribution of points, before applying the peak + // volume cutoff based on a fraction of the max volume in the remaining + // array. But the large volume peaks are still contained in the output. + int Q3_index = grid_points_per_void.size() * 3 / 4; + int Q1_index = grid_points_per_void.size() / 4; + int iqr = grid_points_per_void[Q3_index] - grid_points_per_void[Q1_index]; + constexpr int iqr_multiplier = 5; + int cut = (iqr * iqr_multiplier) + grid_points_per_void[Q3_index]; + + // Remove abnormally high volumes + while (grid_points_per_void[grid_points_per_void.size() - 1] > cut) { + grid_points_per_void.pop_back(); + } + int max_val = grid_points_per_void[grid_points_per_void.size() - 1]; + // Cut based on a fraction of the max volume. + int peak_cutoff = static_cast(peak_volume_cutoff * max_val); + for (int i = grid_points_per_void_unsorted.size() - 1; i >= 0; i--) { + if (grid_points_per_void_unsorted[i] <= peak_cutoff) { + grid_points_per_void_unsorted.erase(grid_points_per_void_unsorted.begin() + + i); + centres_of_mass_frac.erase(centres_of_mass_frac.begin() + i); + } + } + return std::make_tuple(grid_points_per_void_unsorted, centres_of_mass_frac); +} diff --git a/baseline_indexer/indexer.cc b/baseline_indexer/indexer.cc new file mode 100644 index 0000000..f238114 --- /dev/null +++ b/baseline_indexer/indexer.cc @@ -0,0 +1,226 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "common.hpp" +#include "fft3d.cc" +#include "flood_fill.cc" +#include "gemmi/symmetry.hpp" +#include "peaks_to_rlvs.cc" +#include "xyz_to_rlp.cc" + +using Eigen::Matrix3d; +using Eigen::Vector3d; +using Eigen::Vector3i; +using json = nlohmann::json; + +int main(int argc, char** argv) { + // The purpose of an indexer is to determine the lattice model that best + // explains the positions of the strong spots found during spot-finding. + // The lattice model is a set of three vectors that define the crystal + // lattice translations. + // The experiment models (beam, detector) can also be refined during the + // indexing process. The output is a set of models - a new crystal model that + // describes the crystal lattice and an updated set of experiment models. + auto t1 = std::chrono::system_clock::now(); + auto parser = argparse::ArgumentParser(); + parser.add_argument("-e", "--expt").help("Path to the DIALS expt file"); + parser.add_argument("-r", "--refl") + .help("Path to the h5 reflection table file containing spotfinding results"); + parser.add_argument("--dmin") + .help("The resolution limit of spots to use in the indexing process.") + .scan<'f', float>(); + parser.add_argument("--max-cell") + .help("The maximum possible cell length to consider during indexing") + .scan<'f', float>(); + parser + .add_argument( + "--fft-npoints") // mainly for testing, likely would always want to keep it as 256. + .help( + "The number of grid points to use for the fft. Powers of two are most " + "efficient.") + .default_value(256) + .scan<'u', uint32_t>(); + parser + .add_argument("--nthreads") // mainly for testing. + .help( + "The number of threads to use for the fft calculation." + "Defaults to the value of std::thread::hardware_concurrency." + "Better performance can typically be obtained with a higher number" + "of threads than this.") + .scan<'u', size_t>(); + parser.parse_args(argc, argv); + + if (!parser.is_used("expt")) { + logger->error("Must specify experiment list file with --expt\n"); + std::exit(1); + } + if (!parser.is_used("refl")) { + logger->error( + "Must specify spotfinding results file (in DIALS HDF5 format) with --refl\n"); + std::exit(1); + } + // In DIALS, the max cell is automatically determined through a nearest + // neighbour analysis that requires the annlib package. For now, + // let's make this a required argument to help with testing/comparison + // to DIALS. + if (!parser.is_used("max-cell")) { + logger->error("Must specify --max-cell\n"); + std::exit(1); + } + // FIXME use highest resolution by default to remove this requirement. + if (!parser.is_used("dmin")) { + logger->error("Must specify --dmin\n"); + std::exit(1); + } + std::string imported_expt = parser.get("expt"); + std::string filename = parser.get("refl"); + double max_cell = parser.get("max-cell"); + double d_min = parser.get("dmin"); + + // Parse the experiment list (a json file) and load the models. + // Will be moved to dx2. + std::ifstream f(imported_expt); + json elist_json_obj; + try { + elist_json_obj = json::parse(f); + } catch (json::parse_error& ex) { + logger->error("Unable to read {}; json parse error at byte {}", + imported_expt.c_str(), + ex.byte); + std::exit(1); + } + + Experiment expt(elist_json_obj); + Scan scan = expt.scan(); + MonochromaticBeam beam = expt.beam(); + Goniometer gonio = expt.goniometer(); + Detector detector = expt.detector(); + assert(detector.panels().size() + == 1); // only considering single panel detectors initially. + Panel panel = detector.panels()[0]; + + // Read data from a reflection table. Again, this should be moved to + // dx2 and only require the data array name (xyzobs.px.value) with some + // logic to step through the directory structure + std::string array_name = "/dials/processing/group_0/xyzobs.px.value"; + // Note, xyzobs_px is the flattened, on-disk representation of the array + // i.e. if there are 100 spots, the length of xyzobs_px is 300, and + // contains the elements [x0, y0, z0, x1, y1, z1, ..., x99, y99, z99] + std::vector xyzobs_px = + read_array_from_h5_file(filename, array_name); + + // The diffraction spots form a lattice in reciprocal space (if the experimental + // geometry is accurate). So use the experimental models to transform the spot + // coordinates on the detector into reciprocal space. + std::vector rlp = xyz_to_rlp(xyzobs_px, panel, beam, scan, gonio); + logger->info("Number of reflections: {}", rlp.size()); + + // b_iso is an isotropic b-factor used to weight the points when doing the fft. + // i.e. high resolution (weaker) spots are downweighted by the expected + // intensity fall-off as as function of resolution. + double b_iso = -4.0 * std::pow(d_min, 2) * log(0.05); + uint32_t n_points = parser.get("fft-npoints"); + logger->info("Setting b_iso = {:.3f}", b_iso); + + // Create an array to store the fft result. This is a 3D grid of points, typically 256^3. + std::vector real_fft_result(n_points * n_points * n_points); + + // Do the fft of the reciprocal lattice coordinates. + // the used in indexing array denotes whether a coordinate was used for the + // fft (might not be if dmin filter was used for example). The used_in_indexing array + // is sometimes used onwards in the dials indexing algorithms, so keep for now. + size_t nthreads; + if (parser.is_used("nthreads")) { + nthreads = parser.get("nthreads"); + } else { + size_t max_threads = std::thread::hardware_concurrency(); + nthreads = max_threads ? max_threads : 1; + } + + std::vector used_in_indexing = + fft3d(rlp, real_fft_result, d_min, b_iso, n_points, nthreads); + + // The fft result is noisy. We want to extract the peaks, which may be spread over several + // points on the fft grid. So we use a flood fill algorithm (https://en.wikipedia.org/wiki/Flood_fill) + // to determine the connected regions in 3D. This is how it is done in DIALS, but I note that + // perhaps this could be done with connected components analysis. + // So do the flood fill, and extract the centres of mass of the peaks and the number of grid points + // that contribute to each peak. + std::vector grid_points_per_peak; + std::vector fractional_centres_of_mass; + // 15.0 is the DIALS 'rmsd_cutoff' parameter to filter out weak peaks. + std::tie(grid_points_per_peak, fractional_centres_of_mass) = + flood_fill(real_fft_result, 15.0, n_points); + // Do some further filtering, 0.15 is the DIALS peak_volume_cutoff parameter. + std::tie(grid_points_per_peak, fractional_centres_of_mass) = + flood_fill_filter(grid_points_per_peak, fractional_centres_of_mass, 0.15); + + // Convert the peak centres from the fft grid into vectors in reciprocal space. These are our candidate + // lattice vectors. + // 3.0 is the min cell parameter. + std::vector candidate_lattice_vectors = peaks_to_rlvs( + fractional_centres_of_mass, grid_points_per_peak, d_min, 3.0, max_cell, n_points); + + // at this point, we will test combinations of the candidate vectors, use those to index the spots, do some + // refinement of the candidates and choose the best one. Then we will do some more refinement including extra + // model parameters. At then end, we will have a list of refined experiment models (including a crystal) + + // For now, let's just write out the candidate vectors and write out the unrefined experiment models with the + // first combination of candidate vectors as an example crystal, to demonstrate an example experiment list data + // structure. + + // dump the candidate vectors to json + std::string n_vecs = std::to_string(candidate_lattice_vectors.size() - 1); + size_t n_zero = n_vecs.length(); + json vecs_out; + for (int i = 0; i < candidate_lattice_vectors.size(); i++) { + std::string s = std::to_string(i); + auto pad_s = std::string(n_zero - std::min(n_zero, s.length()), '0') + s; + vecs_out[pad_s] = candidate_lattice_vectors[i]; + } + std::string outfile = "candidate_vectors.json"; + std::ofstream vecs_file(outfile); + vecs_file << vecs_out.dump(4); + logger->info("Saved candidate vectors to {}", outfile); + + // Now make a crystal and save an experiment list with the models. + if (candidate_lattice_vectors.size() < 3) { + logger->info( + "Insufficient number of candidate vectors to make a crystal model."); + } else { + gemmi::SpaceGroup space_group = *gemmi::find_spacegroup_by_name("P1"); + Crystal best_xtal{candidate_lattice_vectors[0], + candidate_lattice_vectors[1], + candidate_lattice_vectors[2], + space_group}; + expt.set_crystal(best_xtal); + json elist_out = expt.to_json(); + std::string efile_name = "elist.json"; + std::ofstream efile(efile_name); + efile << elist_out.dump(4); + logger->info("Saved experiment list to {}", efile_name); + } + + auto t2 = std::chrono::system_clock::now(); + std::chrono::duration elapsed_time = t2 - t1; + logger->info("Total time for indexer: {:.4f}s", elapsed_time.count()); +} diff --git a/baseline_indexer/peaks_to_rlvs.cc b/baseline_indexer/peaks_to_rlvs.cc new file mode 100644 index 0000000..5e5b0e6 --- /dev/null +++ b/baseline_indexer/peaks_to_rlvs.cc @@ -0,0 +1,186 @@ +#include +#include + +#include +#include +#include +#include +#include + +#include "common.hpp" + +using Eigen::Vector3d; + +#define _USE_MATH_DEFINES +#include + +// Define a few simple data structures to help with the calculations below. +class VectorGroup { + public: + void add(Vector3d vec, int weight) { + vectors.push_back(vec); + weights.push_back(weight); + } + Vector3d mean() const { + Vector3d sum = + std::accumulate(vectors.begin(), vectors.end(), Vector3d{0, 0, 0}); + return sum / static_cast(vectors.size()); + } + std::vector vectors{}; + std::vector weights{}; +}; + +struct SiteData { + Vector3d site; + double length; + int volume; +}; +bool compare_site_data(const SiteData& a, const SiteData& b) { + return a.length < b.length; +} +bool compare_site_data_volume(const SiteData& a, const SiteData& b) { + return a.volume > b.volume; +} + +bool is_approximate_integer_multiple(Vector3d v1, + Vector3d v2, + double relative_length_tolerance = 0.2, + double angular_tolerance = 5.0) { + double angle = angle_between_vectors_degrees(v1, v2); + if ((angle < angular_tolerance) || (std::abs(180 - angle) < angular_tolerance)) { + double l1 = v1.norm(); + double l2 = v2.norm(); + if (l1 > l2) { + std::swap(l1, l2); + } + double n = l2 / l1; + if (std::abs(std::round(n) - n) < relative_length_tolerance) { + return true; + } + } + return false; +} + +/** + * @brief Rescale the fractional coordinates on the grid to vectors (in reciprocal space). + * @param centres_of_mass_frac The fractional centres of mass of FFT peaks. + * @param grid_points_per_void The number of FFT grid points corresponding to each peak. + * @param d_min The resolution limit that was applied to the FFT. + * @param min_cell Don't consider vectors below this length + * @param max_cell Don't consider vectors above this length + * @param n_points The size of each dimension of the FFT grid. + * @returns Unique vectors, sorted by volume, that give describe the FFT peaks. + */ +std::vector peaks_to_rlvs(std::vector centres_of_mass_frac, + std::vector grid_points_per_void, + double d_min, + double min_cell = 3.0, + double max_cell = 92.3, + uint32_t n_points = 256) { + auto start = std::chrono::system_clock::now(); + // Calculate the scaling between the FFT grid and reciprocal space. + double fft_cell_length = n_points * d_min / 2.0; + // Use 'sites_mod_short' and convert to cartesian (but keep in same array) + for (Vector3d& vec : centres_of_mass_frac) { + // Apply the scaling across the vector + std::transform( + vec.begin(), vec.end(), vec.begin(), [fft_cell_length](double& val) { + if (val > 0.5) val -= 1.0; + return val * fft_cell_length; + }); + } + + // now do some filtering based on the min and max cell + std::vector filtered_data; + for (int i = 0; i < centres_of_mass_frac.size(); ++i) { + double length = centres_of_mass_frac[i].norm(); + if (length > min_cell && length < 2 * max_cell) { + filtered_data.push_back( + {centres_of_mass_frac[i], length, grid_points_per_void[i]}); + } + } + + // Now sort the filtered data. Ggroup together those + // with similar angles and lengths (e.g. inverse pairs from the FFT). + double relative_length_tolerance = 0.1; + double angular_tolerance = 5.0; + std::vector vector_groups{}; + for (const SiteData& data : filtered_data) { + bool matched_group = false; + for (VectorGroup& group : vector_groups) { + Vector3d mean_v = group.mean(); + double mean_v_length = mean_v.norm(); + if ((std::abs(mean_v_length - data.length) + / std::max(mean_v_length, data.length)) + < relative_length_tolerance) { + double angle = angle_between_vectors_degrees(mean_v, data.site); + if (angle < angular_tolerance) { + group.add(data.site, data.volume); + matched_group = true; + break; + } else if (std::abs(180 - angle) < angular_tolerance) { + group.add(-1.0 * data.site, data.volume); + matched_group = true; + break; + } + } + } + // If it didn't match any existing group, create a new one. + if (!matched_group) { + VectorGroup group; + group.add(data.site, data.volume); + vector_groups.push_back(group); + } + } + + // Create "site"s based on the data from the groups. + std::vector grouped_data; + for (const VectorGroup& group : vector_groups) { + Vector3d site = group.mean(); + int max = *std::max_element(group.weights.begin(), group.weights.end()); + grouped_data.push_back({site, site.norm(), max}); + } + + // Sort by volume, then by length. + std::stable_sort( + grouped_data.begin(), grouped_data.end(), compare_site_data_volume); + std::stable_sort(grouped_data.begin(), grouped_data.end(), compare_site_data); + + // Now check if any sites are integer multiples of other sites. + std::vector unique_sites; + for (const SiteData& data : grouped_data) { + bool is_unique = true; + const Vector3d& v = data.site; + for (const SiteData& unique_site : unique_sites) { + // If the volume of the unique site is less than the current site, skip + if (unique_site.volume <= data.volume) { + continue; + } + // If the current site is an integer multiple of the unique site, exit + if (is_approximate_integer_multiple(unique_site.site, v)) { + logger->info("rejecting {:.5f} : is integer multiple of {:.5f}", + v.norm(), + unique_site.site.norm()); + is_unique = false; + break; + } + } + if (is_unique) { + unique_sites.push_back({v, v.norm(), data.volume}); + } + } + // now sort by peak volume again + std::stable_sort( + unique_sites.begin(), unique_sites.end(), compare_site_data_volume); + std::vector unique_vectors_sorted; + logger->info("Candidate basis vectors:"); + for (int i = 0; i < unique_sites.size(); i++) { + unique_vectors_sorted.push_back(unique_sites[i].site); + logger->info("{} {:.5f} {}", i, unique_sites[i].length, unique_sites[i].volume); + } + + std::chrono::duration elapsed_seconds = + std::chrono::system_clock::now() - start; + logger->debug("elapsed time for peaks_to_rlvs: {:.5f}s", elapsed_seconds.count()); + return unique_vectors_sorted; +} diff --git a/baseline_indexer/tests/CMakeLists.txt b/baseline_indexer/tests/CMakeLists.txt new file mode 100644 index 0000000..c305d24 --- /dev/null +++ b/baseline_indexer/tests/CMakeLists.txt @@ -0,0 +1,22 @@ +include(GoogleTest) + +add_executable(test_xyz_to_rlp test_xyz_to_rlp.cc) +target_include_directories(test_xyz_to_rlp PRIVATE "${PROJECT_SOURCE_DIR}") +target_link_libraries(test_xyz_to_rlp GTest::gtest_main dx2 Eigen3::Eigen nlohmann_json::nlohmann_json) + +add_executable(test_fft3d test_fft3d.cc) +target_include_directories(test_fft3d PRIVATE "${PROJECT_SOURCE_DIR}") +target_link_libraries(test_fft3d GTest::gtest_main fmt Eigen3::Eigen pocketfft spdlog::spdlog) + +add_executable(test_flood_fill test_flood_fill.cc) +target_include_directories(test_flood_fill PRIVATE "${PROJECT_SOURCE_DIR}") +target_link_libraries(test_flood_fill GTest::gtest_main fmt Eigen3::Eigen spdlog::spdlog) + +add_executable(test_peaks_to_rlvs test_peaks_to_rlvs.cc) +target_include_directories(test_peaks_to_rlvs PRIVATE "${PROJECT_SOURCE_DIR}") +target_link_libraries(test_peaks_to_rlvs GTest::gtest_main fmt dx2 Eigen3::Eigen spdlog::spdlog) + +gtest_discover_tests(test_xyz_to_rlp PROPERTIES LABELS baseline-indexer-tests) +gtest_discover_tests(test_flood_fill PROPERTIES LABELS baseline-indexer-tests) +gtest_discover_tests(test_fft3d PROPERTIES LABELS baseline-indexer-tests) +gtest_discover_tests(test_peaks_to_rlvs PROPERTIES LABELS baseline-indexer-tests) diff --git a/baseline_indexer/tests/test_baseline_indexer.sh b/baseline_indexer/tests/test_baseline_indexer.sh new file mode 100755 index 0000000..5f4bea6 --- /dev/null +++ b/baseline_indexer/tests/test_baseline_indexer.sh @@ -0,0 +1,95 @@ +#!/bin/bash + +# Run this file from within the build folder i.e. +# ../baseline_indexer/test_baseline_indexer.sh + +# To run the indexer, we need: +# a strong reflection table (in DIALS new H5 format) +# an experiment list (standard dials format) +# a max cell (i.e. upper limit on basis vector length) +# a dmin (resolution limit of spots to use in fourier transform) +if test -f "candidate_vectors.json"; then + rm candidate_vectors.json +fi + +./bin/baseline_indexer \ + -r /dls/i03/data/2024/cm37235-2/processing/JBE/ins_14_24_rot/strong.refl \ + -e /dls/i03/data/2024/cm37235-2/processing/JBE/ins_14_24_rot/imported.expt \ + --max-cell 100 \ + --dmin 1.81 + +# Read the output from the candidate vecs, which is all we have for now. +output=$(cat candidate_vectors.json) + +expected_output='{ + "00": [ + -0.38288460328028456, + -16.046345646564774, + 2.9586537526204055 + ], + "01": [ + -41.00043348644091, + 6.374347624571426, + 53.04086788840915 + ], + "02": [ + -25.233528614044186, + -61.114115715026855, + 14.959117174148561 + ], + "03": [ + -15.894061997532845, + 63.236873000860214, + 38.00999879837036 + ], + "04": [ + -0.45249998569488525, + -13.574999570846558, + 8.144999742507935 + ], + "05": [ + -18.778749406337738, + -87.10624724626541, + -90.95249712467194 + ], + "06": [ + -75.11499762535095, + -18.09999942779541, + 0.0 + ], + "07": [ + -8.144999742507935, + -90.04749715328217, + -25.339999198913574 + ], + "08": [ + -59.72999811172485, + -82.35499739646912, + -37.33124881982803 + ], + "09": [ + 0.0, + 1.809999942779541, + -9.954999685287476 + ], + "10": [ + 1.809999942779541, + 26.244999170303345, + 36.19999885559082 + ] +}' + +# Compare the output with the expected output +if [ "$output" == "$expected_output" ]; then + echo "#############################################" + echo "# #" + echo "# SUCCESS!!! #" + echo "# #" + echo "#############################################" +else + echo "*********************************************" + echo "* *" + echo "* FAILURE!!! *" + echo "* *" + echo "*********************************************" +fi \ No newline at end of file diff --git a/baseline_indexer/tests/test_fft3d.cc b/baseline_indexer/tests/test_fft3d.cc new file mode 100644 index 0000000..36d07d1 --- /dev/null +++ b/baseline_indexer/tests/test_fft3d.cc @@ -0,0 +1,74 @@ +#include +#include + +#include +#include + +#include "common.hpp" +#include "fft3d.cc" +using Eigen::Matrix3d; +using Eigen::Vector3d; + +TEST(BaselineIndexer, map_centroids_to_reciprocal_space_test) { + std::vector reciprocal_space_vectors{}; + reciprocal_space_vectors.push_back({-0.2, 0.2, 0.25}); + reciprocal_space_vectors.push_back({-0.2, 0.1, 0.10}); + uint32_t n_points = 64; + std::vector> complex_data_in(n_points * n_points * n_points); + std::vector used_in_indexing(reciprocal_space_vectors.size(), true); + double d_min = 2.0; + // First test with no biso; + double b_iso = 0.0; + map_centroids_to_reciprocal_space_grid(reciprocal_space_vectors, + complex_data_in, + used_in_indexing, + d_min, + b_iso, + n_points); + // expect these map to data at indices 80294 and 80752 + EXPECT_DOUBLE_EQ(complex_data_in[80294].real(), 1.0); + EXPECT_DOUBLE_EQ(complex_data_in[80752].real(), 1.0); + // check no other values have been written + EXPECT_DOUBLE_EQ( + std::accumulate( + complex_data_in.begin(), complex_data_in.end(), std::complex{0.0, 0.0}) + .real(), + 2.0); + + // Now set a biso; + double b_iso_2 = 10.0; + std::vector> complex_data_in_2(n_points * n_points * n_points); + map_centroids_to_reciprocal_space_grid(reciprocal_space_vectors, + complex_data_in_2, + used_in_indexing, + d_min, + b_iso_2, + n_points); + EXPECT_DOUBLE_EQ(complex_data_in_2[80294].real(), 0.86070797642505781); + EXPECT_DOUBLE_EQ(complex_data_in_2[80752].real(), 0.70029752396813894); + //check no other values have been written + EXPECT_DOUBLE_EQ( + std::accumulate( + complex_data_in_2.begin(), complex_data_in_2.end(), std::complex{0.0, 0.0}) + .real(), + 1.5610055003931969); + + // Now set a d_min, which filters out one of the points; + double dmin_2 = 4.0; + std::vector> complex_data_in_3(n_points * n_points * n_points); + map_centroids_to_reciprocal_space_grid(reciprocal_space_vectors, + complex_data_in_3, + used_in_indexing, + dmin_2, + b_iso_2, + n_points); + // The index changes as reciprocal space is rescaled to cover the resolution range. + // now expect a single value at index 27501 + EXPECT_DOUBLE_EQ(complex_data_in_3[27501].real(), 0.86070797642505781); + // check no other values have been written + EXPECT_DOUBLE_EQ( + std::accumulate( + complex_data_in_3.begin(), complex_data_in_3.end(), std::complex{0.0, 0.0}) + .real(), + 0.86070797642505781); +} \ No newline at end of file diff --git a/baseline_indexer/tests/test_flood_fill.cc b/baseline_indexer/tests/test_flood_fill.cc new file mode 100644 index 0000000..04c10d4 --- /dev/null +++ b/baseline_indexer/tests/test_flood_fill.cc @@ -0,0 +1,83 @@ +#include +#include + +#include +#include + +#include "common.hpp" +#include "flood_fill.cc" +using Eigen::Matrix3d; +using Eigen::Vector3d; + +TEST(BaselineIndexer, flood_fill_test) { + // Modify the test values (for channel) from cctbx masks flood_fill + int n_points = 5; + std::vector grid(n_points * n_points * n_points, 0.0); + std::vector corner_cube{ + {0, 4, 20, 24, 100, 104, 120, 124}}; // cube across all 8 corners + // i.e. at fractional coords (0,0,0), (0,0.8,0), (0,0,0.8), ... (0.8,0.8,0.8) + std::vector channel{ + {12, 37, 38, 39, 42, 43, 62, 63, 67, 112}}; // a channel with a break + // channel: fractional coords along z: 1 at 0, 5 at 0.2, 3 at 0.4, 1 at 0.8 (==-0.2) + for (auto& i : corner_cube) { + grid[i] = 100; + } + for (auto& i : channel) { + grid[i] = 100; + } + // now add a weak point (next to the corner), which should get filtered out by the rmsd cutoff filter. + grid[1] = 1; // the RMSD is approx 35, so anything below this is filtered out. + std::vector grid_points_per_void; + std::vector centres_of_mass_frac; + std::tie(grid_points_per_void, centres_of_mass_frac) = + flood_fill(grid, 1.0, n_points); + + EXPECT_EQ(grid_points_per_void[0], 10); // channel + EXPECT_EQ(grid_points_per_void[1], 8); // corner cube + // we return z,y,x. + EXPECT_DOUBLE_EQ(centres_of_mass_frac[0][0], 1.2); // z (== 0.2) + EXPECT_DOUBLE_EQ(centres_of_mass_frac[0][1], 0.46); // y + EXPECT_DOUBLE_EQ(centres_of_mass_frac[0][2], 0.5); // x + // points over the boundary are equivalent modulo 1.0 (centre of space is at 0.5,0.5,0.5) + EXPECT_DOUBLE_EQ(centres_of_mass_frac[1][0], 0.9); // corner cube + EXPECT_DOUBLE_EQ(centres_of_mass_frac[1][1], -0.1); // corner cube (==0.9) + EXPECT_DOUBLE_EQ(centres_of_mass_frac[1][2], 0.9); // corner cube +} + +TEST(BaselineIndexer, flood_fill_filter_test) { + std::vector grid_points_per_void{{1, 3, 1, 2, 80, 5, 3, 4, 2}}; + std::vector centres_of_mass_frac; + for (int i = 0; i < grid_points_per_void.size(); i++) { + double frac = static_cast(i + 1) / 10.0; + centres_of_mass_frac.push_back({frac, frac, frac}); + } + std::vector grid_points_per_void_out; + std::vector centres_of_mass_frac_out; + // The value 80 should be internally filtered out based on IQR + // but solely for the purposes of the peak_volume_cutoff check. + // i.e. Then max value is 5, so with a peak_volume_cutoff of 0.2, + // only the ones should be removed. + std::tie(grid_points_per_void_out, centres_of_mass_frac_out) = + flood_fill_filter(grid_points_per_void, centres_of_mass_frac, 0.2); + + EXPECT_EQ(grid_points_per_void_out.size(), 7); + EXPECT_EQ(centres_of_mass_frac_out.size(), 7); + EXPECT_EQ(grid_points_per_void.size(), 9); // it was unmodified. + EXPECT_EQ(centres_of_mass_frac.size(), 9); // it was unmodified. + std::vector expected_grid_points_per_void{{3, 2, 80, 5, 3, 4, 2}}; + std::vector expected_centres_of_mass; + expected_centres_of_mass.push_back({0.2, 0.2, 0.2}); + expected_centres_of_mass.push_back({0.4, 0.4, 0.4}); + expected_centres_of_mass.push_back({0.5, 0.5, 0.5}); + expected_centres_of_mass.push_back({0.6, 0.6, 0.6}); + expected_centres_of_mass.push_back({0.7, 0.7, 0.7}); + expected_centres_of_mass.push_back({0.8, 0.8, 0.8}); + expected_centres_of_mass.push_back({0.9, 0.9, 0.9}); + for (int i = 0; i < grid_points_per_void_out.size(); i++) { + EXPECT_EQ(grid_points_per_void_out[i], expected_grid_points_per_void[i]); + for (int j = 0; j < 3; j++) { + EXPECT_DOUBLE_EQ(centres_of_mass_frac_out[i][j], + expected_centres_of_mass[i][j]); + } + } +} \ No newline at end of file diff --git a/baseline_indexer/tests/test_peaks_to_rlvs.cc b/baseline_indexer/tests/test_peaks_to_rlvs.cc new file mode 100644 index 0000000..a2dccef --- /dev/null +++ b/baseline_indexer/tests/test_peaks_to_rlvs.cc @@ -0,0 +1,90 @@ +#include +#include + +#include +#include + +#include "common.hpp" +#include "peaks_to_rlvs.cc" + +using Eigen::Matrix3d; +using Eigen::Vector3d; + +TEST(BaselineIndexer, peaks_to_rlvs_test) { + // peaks_to_rlvs transforms the fractional centres of mass on the FFT grid + // back into basis vectors in reciprocal space. + // The origin of space is at fractional coordinate (0.5, 0.5, 0.5). + std::vector grid_points_per_void; + std::vector centres_of_mass_frac; + grid_points_per_void.push_back(8); + grid_points_per_void.push_back(10); + grid_points_per_void.push_back(10); + centres_of_mass_frac.push_back( + {0.75, 0.75, 0.75}); // rlp = (64.0, 64.0, 64.0) = 110.85A + centres_of_mass_frac.push_back( + {0.1, 0.1, 0.1}); // rlp = (25.6, 25.6,25.6) = 44.34A + centres_of_mass_frac.push_back( + {0.4, 0.4, 0.4}); // rlp = (102.4, 102.4,102.4) = 177.36A + double dmin = 2.0; + double min_cell = 3.0; + double max_cell = 100.0; + uint32_t n_points = 256; + std::vector unique = peaks_to_rlvs( + centres_of_mass_frac, grid_points_per_void, dmin, min_cell, max_cell, n_points); + // Results should be sorted by grid points per void in descending order + // Equivalent multiples are not filtered if the grid points per void are equal. + EXPECT_EQ(unique.size(), 3); + for (int i = 0; i < 3; i++) { + EXPECT_DOUBLE_EQ(unique[0][i], 25.6); // point at 0.1,0.1,0.1 + } + for (int i = 0; i < 3; i++) { + EXPECT_DOUBLE_EQ(unique[1][i], 102.4); // point at 0.4,0.4,0.4 + } + for (int i = 0; i < 3; i++) { + EXPECT_DOUBLE_EQ(unique[2][i], -64.0); // point at 0.75,0.75,0.75 + } + + grid_points_per_void[1] = 11; // This causes the third to + // get filtered as an equivalent to the second + std::vector unique2 = peaks_to_rlvs( + centres_of_mass_frac, grid_points_per_void, dmin, min_cell, max_cell, n_points); + EXPECT_EQ(unique2.size(), 2); + for (int i = 0; i < 3; i++) { + EXPECT_DOUBLE_EQ(unique2[0][i], 25.6); // point at 0.1,0.1,0.1 + } + for (int i = 0; i < 3; i++) { + EXPECT_DOUBLE_EQ(unique2[1][i], -64.0); // point at 0.75,0.75,0.75 + } + + // now check grouping based on length and angle - second and third should be combined + centres_of_mass_frac[1] = {0.6, 0.6, 0.6}; // i.e. inverse pair to index 2 + centres_of_mass_frac[2] = { + 0.405, 0.405, 0.405}; // similar but not exactly equal to index 1 + grid_points_per_void[1] = + 10; // i.e. wouldn't get filtered out by approx multiple filter + std::vector unique3 = peaks_to_rlvs( + centres_of_mass_frac, grid_points_per_void, dmin, min_cell, max_cell, n_points); + EXPECT_EQ(unique3.size(), 2); + for (int i = 0; i < 3; i++) { + EXPECT_DOUBLE_EQ( + unique3[0][i], + -103.04); // mean of point at 0.6,0.6,0.6 and 0.405,0.405,0.405 + } + for (int i = 0; i < 3; i++) { + EXPECT_DOUBLE_EQ(unique3[1][i], -64.0); // point at 0.75,0.75,0.75 + } + + // check min and max cell filters + // the three points are at d-values of 44.34, 177.36 and 110.85 + // set min_cell to 50 and max_cell to 80, to just leave the 110.85 solution (yes the filter in the + // dials code is cell < 2 * max_cell...). This could be changed here to make sense. + double min_cell2 = 50.0; + double max_cell2 = 80.0; + centres_of_mass_frac[2] = {0.4, 0.4, 0.4}; + std::vector unique4 = peaks_to_rlvs( + centres_of_mass_frac, grid_points_per_void, dmin, min_cell2, max_cell2, n_points); + EXPECT_EQ(unique4.size(), 1); + for (int i = 0; i < 3; i++) { + EXPECT_DOUBLE_EQ(unique4[0][i], -64.0); // rlp = (64.0, 64.0, 64.0) = 110.85A + } +} \ No newline at end of file diff --git a/baseline_indexer/tests/test_xyz_to_rlp.cc b/baseline_indexer/tests/test_xyz_to_rlp.cc new file mode 100644 index 0000000..24a3828 --- /dev/null +++ b/baseline_indexer/tests/test_xyz_to_rlp.cc @@ -0,0 +1,53 @@ +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "xyz_to_rlp.cc" + +using Eigen::Matrix3d; +using Eigen::Vector3d; +using json = nlohmann::json; + +TEST(BaselineIndexer, XyztoRlptest) { + // Test the xyz_to_rlp function. Compare output to the dials + // equivalent on the same data: centroid_px_to_mm plus + // map_centroids_to_reciprocal_space + std::vector xyzobs_px{{10.1, 10.1, 50.2, 20.1, 20.1, 70.2}}; + Scan scan{{1, 100}, {0.0, 0.1}}; //image range and oscillation. + Goniometer gonio{ + {{1.0, 0.0, 0.0}}, {{0.0}}, {{"phi"}}, 0}; //axes, angles, names, scan-axis. + json panel_data; + panel_data["fast_axis"] = {1.0, 0.0, 0.0}; + panel_data["slow_axis"] = {0.0, -1.0, 0.0}; + panel_data["origin"] = {-150, 162, -200}; + panel_data["pixel_size"] = {0.075, 0.075}; + panel_data["image_size"] = {4148, 4362}; + panel_data["trusted_range"] = {0.0, 46051}; + panel_data["type"] = std::string("SENSOR_PAD"); + panel_data["name"] = std::string("test"); + panel_data["raw_image_offset"] = {0, 0}; + panel_data["thickness"] = 0.45; + panel_data["material"] = "Si"; + panel_data["mu"] = 3.92; + panel_data["gain"] = 1.0; + panel_data["pedestal"] = 0.0; + json pxdata; + pxdata["type"] = std::string("ParallaxCorrectedPxMmStrategy"); + panel_data["px_mm_strategy"] = pxdata; + Panel panel{panel_data}; // use defaults + MonochromaticBeam beam{1.0}; //wavelength + std::vector rlp = xyz_to_rlp(xyzobs_px, panel, beam, scan, gonio); + // Check against the equivalent results from the dials calculation + Vector3d expected_0{{-0.5021752936083477, 0.5690514955867707, 0.27788051106787137}}; + Vector3d expected_1{{-0.5009709068399325, 0.5770958485799975, 0.2562207980973077}}; + for (int i = 0; i < 3; i++) { + EXPECT_DOUBLE_EQ(rlp[0][i], expected_0[i]); + EXPECT_DOUBLE_EQ(rlp[1][i], expected_1[i]); + } +} \ No newline at end of file diff --git a/baseline_indexer/xyz_to_rlp.cc b/baseline_indexer/xyz_to_rlp.cc new file mode 100644 index 0000000..6e55fb1 --- /dev/null +++ b/baseline_indexer/xyz_to_rlp.cc @@ -0,0 +1,81 @@ +#include +#include +#include +#include +#include + +#include + +using Eigen::Matrix3d; +using Eigen::Vector3d; + +/** + * @brief Transform detector pixel coordinates into reciprocal space coordinates. + * @param xyzobs_px A 1D array of detector pixel coordinates from a single panel. + * @param panel A dx2 Panel object defining the corresponding detector panel. + * @param beam A dx2 MonochromaticBeam object. + * @param scan A dx2 Scan object. + * @param gonio A dx2 Goniometer object. + * @returns A vector of reciprocal space coordinates. + */ +std::vector xyz_to_rlp(const std::vector &xyzobs_px, + const Panel &panel, + const MonochromaticBeam &beam, + const Scan &scan, + const Goniometer &gonio) { + // Use the experimental models to perform a coordinate transformation from + // pixel coordinates in detector space to reciprocal space, in units of + // inverse angstrom. + // An equivalent to dials flex_ext.map_centroids_to_reciprocal_space method + + constexpr double DEG2RAD = M_PI / 180.0; + + // xyzobs_px is a flattened array, we want to return a vector of Vector3ds, + // so the size is divided by 3. + std::vector rlp(xyzobs_px.size() / 3); + + // Extract the quantities from the models that are needed for the calculation. + Vector3d s0 = beam.get_s0(); + double wl = beam.get_wavelength(); + std::array oscillation = scan.get_oscillation(); + const auto [osc_start, osc_width] = scan.get_oscillation(); + int image_range_start = scan.get_image_range()[0]; + Matrix3d setting_rotation_inverse = gonio.get_setting_rotation().inverse(); + Matrix3d sample_rotation_inverse = gonio.get_sample_rotation().inverse(); + Vector3d rotation_axis = gonio.get_rotation_axis(); + Matrix3d d_matrix = panel.get_d_matrix(); + + for (int i = 0; i < rlp.size(); ++i) { + // first convert detector pixel positions into mm + int vec_idx = 3 * i; + double x1 = xyzobs_px[vec_idx]; + double x2 = xyzobs_px[vec_idx + 1]; + double x3 = xyzobs_px[vec_idx + 2]; + std::array xymm = panel.px_to_mm(x1, x2); + // convert the image 'z' coordinate to rotation angle based on the scan data + double rot_angle = + (((x3 + 1 - image_range_start) * osc_width) + osc_start) * DEG2RAD; + + // calculate the s1 vector using the detector d matrix + Vector3d m = {xymm[0], xymm[1], 1.0}; + Vector3d s1_i = d_matrix * m; + s1_i.normalize(); + // convert into inverse ansgtroms + Vector3d s1_this = s1_i / wl; + + // now apply the goniometer matrices + // see https://dials.github.io/documentation/conventions.html for full conventions + // rlp = F^-1 * R'^-1 * S^-1 * (s1-s0) + Vector3d S = setting_rotation_inverse * (s1_this - s0); + double cos = std::cos(-1.0 * rot_angle); + double sin = std::sin(-1.0 * rot_angle); + // The DIALS equivalent to the code below is + // rlp_this = S.rotate_around_origin(gonio.rotation_axis, -1.0 * rot_angle); + Vector3d rlp_this = (S * cos) + + (rotation_axis * rotation_axis.dot(S) * (1 - cos)) + + (sin * rotation_axis.cross(S)); + + rlp[i] = sample_rotation_inverse * rlp_this; + } + return rlp; +}