diff --git a/Makefile b/Makefile index 63dcd73..c247518 100644 --- a/Makefile +++ b/Makefile @@ -20,7 +20,7 @@ IOMP5 = 0 ########################### end ########################### -VERSION=0.4.5 +VERSION=0.4.6 # detect OS architecture and add flags Platform := $(shell uname -s) diff --git a/src/Common.hpp b/src/Common.hpp index 12b3224..4a2d08d 100644 --- a/src/Common.hpp +++ b/src/Common.hpp @@ -1,10 +1,22 @@ +/******************************************************************************* + * @file https://github.com/Zilong-Li/PCAone/src/Common.hpp + * @author Zilong Li + * Copyright (C) 2022-2024. Use of this code is governed by the LICENSE file. + ******************************************************************************/ #ifndef PCAone_Common_H #define PCAone_Common_H #include +#include #include #include #include +#include +#include +#include + +#define ZSTD_STATIC_LINKING_ONLY +#include "zstd.h" #define MAF(a) ((a) > 0.5 ? (1 - a) : (a)) @@ -34,5 +46,51 @@ inline UMapIntInt vector2map(const Int1D& v) { return m; } +template +inline std::vector sortidx(const std::vector& v) { + std::vector idx(v.size()); + std::iota(idx.begin(), idx.end(), 0); + std::sort(idx.begin(), idx.end(), + [&v](size_t i1, size_t i2) { return v[i1] < v[i2]; }); + return idx; +} + +struct Line { + std::string data; + operator std::string const &() const { return data; } + friend std::istream& operator>>(std::istream& ifs, Line& line) { + return std::getline(ifs, line.data); + } +}; + +struct SNPld { + std::vector pos; // pos of each SNP + std::vector end_pos; // 0-based index for last snp pos + std::vector chr; // chr sequences + Double1D af; // allele frequency + std::vector ws; // the snp index, i.e the index for lead SNP + std::vector we; // the number of SNPs (including lead SNP) in a window +}; + +struct ZstdBuffer { + ZstdBuffer() { + buffInTmp.reserve(buffInSize); + buffOutTmp.reserve(buffOutSize); + } + ~ZstdBuffer() { + ZSTD_freeDCtx(dctx); + if (fclose(fin)) { + perror("fclose error"); + exit(1); + } + } + FILE* fin = nullptr; + size_t const buffInSize = ZSTD_DStreamInSize(); + size_t const buffOutSize = ZSTD_DStreamOutSize(); + ZSTD_DCtx* const dctx = ZSTD_createDCtx(); + size_t lastRet = 1; + std::string buffCur = ""; + std::string buffLine, buffInTmp, buffOutTmp; +}; #endif // PCAone_Common_H diff --git a/src/Data.cpp b/src/Data.cpp index 038513b..161f803 100644 --- a/src/Data.cpp +++ b/src/Data.cpp @@ -6,9 +6,6 @@ #include "Data.hpp" -#include -#include - #include "Eigen/src/Core/util/Meta.h" #include "LD.hpp" #include "Utils.hpp" @@ -215,8 +212,8 @@ void Data::write_residuals(const MyVector &S, const MyMatrix &U, "ld-stats=0: calculate the ancestry adjusted LD matrix!"); } std::ofstream ofs(params.fileout + ".residuals", std::ios::binary); - const uint ibyte = 4; - const uint magic = ibyte * 2; + const uint64 ibyte = 4; + const uint64 magic = ibyte * 2; uint64 bytes_per_snp = nsamples * ibyte; ofs.write((char *)&nsnps, ibyte); ofs.write((char *)&nsamples, ibyte); @@ -229,7 +226,7 @@ void Data::write_residuals(const MyVector &S, const MyMatrix &U, for (Eigen::Index ib = 0; ib < G.cols(); ib++) { fg = G.col(ib).cast(); if (params.perm) { - idx = magic + perm.indices()[ib] * bytes_per_snp; + idx = magic + (uint64)perm.indices()[ib] * bytes_per_snp; ofs.seekp(idx, std::ios_base::beg); } ofs.write((char *)fg.data(), bytes_per_snp); @@ -246,7 +243,7 @@ void Data::write_residuals(const MyVector &S, const MyMatrix &U, for (Eigen::Index ib = 0; ib <= stop[b] - start[b]; ib++, i++) { fg = G.col(ib).cast(); if (params.perm) { - idx = magic + perm.indices()[i] * bytes_per_snp; + idx = magic + (uint64)perm.indices()[i] * bytes_per_snp; ofs.seekp(idx, std::ios_base::beg); } ofs.write((char *)fg.data(), bytes_per_snp); diff --git a/src/FileBinary.cpp b/src/FileBinary.cpp index 4cb5859..d71f192 100644 --- a/src/FileBinary.cpp +++ b/src/FileBinary.cpp @@ -2,21 +2,6 @@ using namespace std; -bool isZstdCompressed(const char *filename) { - FILE *file = fopen(filename, "rb"); - if (!file) return false; - - char magicNumber[4]; - if (fread(magicNumber, 1, 4, file) != 4) { - fclose(file); - return false; - } - - bool isCompressed = (ZSTD_isFrame(magicNumber, 4) != 0); - - fclose(file); - return isCompressed; -} void FileBin::check_file_offset_first_var() { setlocale(LC_ALL, "C"); diff --git a/src/FileBinary.hpp b/src/FileBinary.hpp index 038c469..89a9680 100644 --- a/src/FileBinary.hpp +++ b/src/FileBinary.hpp @@ -2,10 +2,7 @@ #define FILEBINARY_H_ #include "Data.hpp" -#define ZSTD_STATIC_LINKING_ONLY -#include "zstd.h" -bool isZstdCompressed(const char *filename); class FileBin : public Data { public: diff --git a/src/FilePlink.cpp b/src/FilePlink.cpp index 8fbbfb4..d54d959 100644 --- a/src/FilePlink.cpp +++ b/src/FilePlink.cpp @@ -6,8 +6,6 @@ #include "FilePlink.hpp" -#include - using namespace std; void FileBed::check_file_offset_first_var() { diff --git a/src/LD.cpp b/src/LD.cpp index 1f6a600..3df1b89 100644 --- a/src/LD.cpp +++ b/src/LD.cpp @@ -8,10 +8,6 @@ #include -#include -#include -#include - #include "Cmd.hpp" #include "Data.hpp" #include "Utils.hpp" diff --git a/src/Timer.hpp b/src/Timer.hpp index 2f3ca54..f22bcb0 100644 --- a/src/Timer.hpp +++ b/src/Timer.hpp @@ -1,6 +1,7 @@ #ifndef TIMER_H_ #define TIMER_H_ +#include #include #include // put_time #include diff --git a/src/Utils.cpp b/src/Utils.cpp index 8c949ef..a2adf11 100644 --- a/src/Utils.cpp +++ b/src/Utils.cpp @@ -223,3 +223,19 @@ void make_plink2_eigenvec_file(int K, std::string fout, const std::string& fin, ofs << tokens[0] + "\t" + tokens[1] + "\t" << line2 << std::endl; } } + +bool isZstdCompressed(const char *filename) { + FILE *file = fopen(filename, "rb"); + if (!file) return false; + + char magicNumber[4]; + if (fread(magicNumber, 1, 4, file) != 4) { + fclose(file); + return false; + } + + bool isCompressed = (ZSTD_isFrame(magicNumber, 4) != 0); + + fclose(file); + return isCompressed; +} diff --git a/src/Utils.hpp b/src/Utils.hpp index 408673a..ba968a4 100644 --- a/src/Utils.hpp +++ b/src/Utils.hpp @@ -3,23 +3,18 @@ #include -#include #include -#include -#include #include #include #include #include #include -#include #include #include #include "Common.hpp" #include "Logger.hpp" #include "Timer.hpp" -#include "zstd.h" // MAKE SOME TOOLS FULLY ACCESSIBLE THROUGHOUT THE SOFTWARE #ifdef _DECLARE_TOOLBOX_HERE @@ -30,32 +25,6 @@ extern Timer tick; extern Logger cao; #endif -template -inline std::vector sortidx(const std::vector& v) { - std::vector idx(v.size()); - std::iota(idx.begin(), idx.end(), 0); - std::sort(idx.begin(), idx.end(), - [&v](size_t i1, size_t i2) { return v[i1] < v[i2]; }); - return idx; -} - -struct Line { - std::string data; - operator std::string const &() const { return data; } - friend std::istream& operator>>(std::istream& ifs, Line& line) { - return std::getline(ifs, line.data); - } -}; - -struct SNPld { - std::vector pos; // pos of each SNP - std::vector end_pos; // 0-based index for last snp pos - std::vector chr; // chr sequences - Double1D af; // allele frequency - std::vector ws; // the snp index, i.e the index for lead SNP - std::vector we; // the number of SNPs (including lead SNP) in a window -}; - std::string get_machine(); void fcloseOrDie(FILE* file); @@ -91,22 +60,6 @@ std::vector split_string(const std::string& s, void make_plink2_eigenvec_file(int K, std::string fout, const std::string& fin, const std::string& fam); -struct ZstdBuffer { - ZstdBuffer() { - buffInTmp.reserve(buffInSize); - buffOutTmp.reserve(buffOutSize); - } - ~ZstdBuffer() { - ZSTD_freeDCtx(dctx); - fcloseOrDie(fin); - } - FILE* fin = nullptr; - size_t const buffInSize = ZSTD_DStreamInSize(); - size_t const buffOutSize = ZSTD_DStreamOutSize(); - ZSTD_DCtx* const dctx = ZSTD_createDCtx(); - size_t lastRet = 1; - std::string buffCur = ""; - std::string buffLine, buffInTmp, buffOutTmp; -}; +bool isZstdCompressed(const char *filename); #endif // PCAONE_UTILES_