Skip to content

Commit

Permalink
Merge branch 'fix/ubsan' into devel
Browse files Browse the repository at this point in the history
  • Loading branch information
DominikRafacz committed Jul 11, 2021
2 parents f801c89 + 82b809e commit d64ac3f
Show file tree
Hide file tree
Showing 12 changed files with 255 additions and 580 deletions.
2 changes: 1 addition & 1 deletion inst/include/tidysq/Alphabet.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ namespace tidysq {

static std::unordered_map<LetterValue, const Letter> prepare_value_to_letter(const std::vector<Letter> &letters) {
std::unordered_map<LetterValue, const Letter> ret{};
for (int i = 0; i < letters.size(); i++) {
for (unsigned short i = 0; i < letters.size(); i++) {
if (letters[i].empty())
throw std::invalid_argument("each \"letter\" has to have at least one character!");
ret.insert({i, letters[i]});
Expand Down
2 changes: 1 addition & 1 deletion inst/include/tidysq/ProtoSq.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ namespace tidysq {

inline bool operator==(const ProtoSq<INTERNAL, PROTO> &other) const {
if ((alphabet_ != other.alphabet_) || (content_.size() != other.content_.size())) return false;
for (LenSq i = 0; i < content_.size(); i++) {
for (typeof(content_.size()) i = 0; i < content_.size(); i++) {
if ((*this)[i] != other[i]) return false;
}
return true;
Expand Down
98 changes: 44 additions & 54 deletions inst/include/tidysq/internal/pack.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,13 @@ namespace tidysq::internal {
const Alphabet &alphabet) {
LenSq out_byte = 0;
auto interpreter = unpacked.template content_interpreter<SIMPLE>(alphabet);
LetterValue v1, v2, v3, v4;
while (!interpreter.reached_end()) {
packed(out_byte) = (interpreter.get_next_value() ) |
(interpreter.get_next_value() << 2u) |
(interpreter.get_next_value() << 4u) |
(interpreter.get_next_value() << 6u) ;
v1 = interpreter.get_next_value();
v2 = interpreter.get_next_value() << 2u;
v3 = interpreter.get_next_value() << 4u;
v4 = interpreter.get_next_value() << 6u;
packed(out_byte) = v1 | v2 | v3 | v4 ;
++out_byte;
}
packed.trim(interpreter.interpreted_letters(), alphabet);
Expand All @@ -30,27 +32,23 @@ namespace tidysq::internal {
const Alphabet &alphabet) {
LenSq out_byte = 0;
auto interpreter = unpacked.template content_interpreter<SIMPLE>(alphabet);
LetterValue tmp;
LetterValue v1, v2, v3, v4;
while (!interpreter.reached_end()) {
packed(out_byte) = (interpreter.get_next_value() ) |
(interpreter.get_next_value() << 3u) ;
tmp = interpreter.get_next_value() ;
packed(out_byte) |= (tmp << 6u) ;

v1 = interpreter.get_next_value();
v2 = interpreter.get_next_value() << 3u;
v3 = interpreter.get_next_value();
packed(out_byte) = v1 | v2 | (v3 << 6u) ;
if (++out_byte == packed.size()) break;

packed(out_byte) = (tmp >> 2u) |
(interpreter.get_next_value() << 1u) |
(interpreter.get_next_value() << 4u) ;
tmp = interpreter.get_next_value() ;
packed(out_byte) |= (tmp << 7u) ;

v1 = interpreter.get_next_value() << 1u;
v2 = interpreter.get_next_value() << 4u;
v4 = interpreter.get_next_value();
packed(out_byte) = (v3 >> 2u) | v1 | v2 | (v4 << 7u) ;
if (++out_byte == packed.size()) break;

packed(out_byte) = (tmp >> 1u) |
(interpreter.get_next_value() << 2u) |
(interpreter.get_next_value() << 5u) ;

v1 = interpreter.get_next_value() << 2u;
v2 = interpreter.get_next_value() << 5u;
packed(out_byte) = (v4 >> 1u) | v1 | v2 ;
++out_byte;
}
packed.trim(interpreter.interpreted_letters(), alphabet);
Expand All @@ -63,9 +61,11 @@ namespace tidysq::internal {
const Alphabet &alphabet) {
LenSq out_byte = 0;
auto interpreter = unpacked.template content_interpreter<SIMPLE>(alphabet);
LetterValue v1, v2;
while (!interpreter.reached_end()) {
packed(out_byte) = (interpreter.get_next_value() ) |
(interpreter.get_next_value() << 4u) ;
v1 = interpreter.get_next_value();
v2 = interpreter.get_next_value() << 4u;
packed(out_byte) = v1 | v2 ;
++out_byte;
}
packed.trim(interpreter.interpreted_letters(), alphabet);
Expand All @@ -77,36 +77,29 @@ namespace tidysq::internal {
const Alphabet &alphabet) {
LenSq out_byte = 0;
auto interpreter = unpacked.template content_interpreter<SIMPLE>(alphabet);
LetterValue tmp;
LetterValue v1, v2, v3;
while (!interpreter.reached_end()) {
packed(out_byte) = (interpreter.get_next_value() ) ;
tmp = interpreter.get_next_value() ;
packed(out_byte) |= (tmp << 5u) ;

v1 = interpreter.get_next_value();
v2 = interpreter.get_next_value();
packed(out_byte) = v1 | (v2 << 5u) ;
if (++out_byte == packed.size()) break;

packed(out_byte) = (tmp >> 3u) |
(interpreter.get_next_value() << 2u) ;
tmp = interpreter.get_next_value() ;
packed(out_byte) |= (tmp << 7u) ;

v1 = interpreter.get_next_value() << 2u;
v3 = interpreter.get_next_value();
packed(out_byte) = (v2 >> 3u) | v1 | (v3 << 7u) ;
if (++out_byte == packed.size()) break;

packed(out_byte) = (tmp >> 1u) ;
tmp = interpreter.get_next_value() ;
packed(out_byte) |= (tmp << 4u) ;

v1 = interpreter.get_next_value();
packed(out_byte) = (v3 >> 1u) | (v1 << 4u) ;
if (++out_byte == packed.size()) break;

packed(out_byte) = (tmp >> 4u) |
(interpreter.get_next_value() << 1u) ;
tmp = interpreter.get_next_value() ;
packed(out_byte) |= (tmp << 6u) ;

v2 = interpreter.get_next_value() << 1u;
v3 = interpreter.get_next_value();
packed(out_byte) = (v1 >> 4u) | v2 | (v3 << 6u) ;
if (++out_byte == packed.size()) break;

packed(out_byte) = (tmp >> 2u) |
(interpreter.get_next_value() << 3u) ;
v1 = interpreter.get_next_value() << 3u;
packed(out_byte) = (v3 >> 2u) | v1 ;
++out_byte;
}
packed.trim(interpreter.interpreted_letters(), alphabet);
Expand All @@ -118,22 +111,19 @@ namespace tidysq::internal {
const Alphabet &alphabet) {
LenSq out_byte = 0;
auto interpreter = unpacked.template content_interpreter<SIMPLE>(alphabet);
LetterValue tmp;
LetterValue v1, v2;
while (!interpreter.reached_end()) {
packed(out_byte) = (interpreter.get_next_value() ) ;
tmp = interpreter.get_next_value() ;
packed(out_byte) |= (tmp << 6u) ;

v1 = interpreter.get_next_value();
v2 = interpreter.get_next_value();
packed(out_byte) = v1 | (v2 << 6u);
if (++out_byte == packed.size()) break;

packed(out_byte) = (tmp >> 2u) ;
tmp = interpreter.get_next_value() ;
packed(out_byte) |= (tmp << 4u) ;

v1 = interpreter.get_next_value();
packed(out_byte) = (v2 >> 2u) | (v1 << 4u) ;
if (++out_byte == packed.size()) break;

packed(out_byte) = (tmp >> 4u) |
(interpreter.get_next_value() << 2u) ;
v2 = interpreter.get_next_value() << 2u;
packed(out_byte) = (v1 >> 4u) | v2 ;
++out_byte;
}
packed.trim(interpreter.interpreted_letters(), alphabet);
Expand Down
4 changes: 2 additions & 2 deletions inst/include/tidysq/io/write_fasta.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ namespace tidysq {
}

void write_sequence_part(const std::string &content,
LenSq &written) {
u_LenSq &written) {
// if there is more to be written than content size, write only part of it
if (content.size() - written >= width_) {
stream_.write(content.data() + written, width_);
Expand All @@ -36,7 +36,7 @@ namespace tidysq {
void write_sequence(LenSq i) {
ProtoSequence<STD_IT, STRING_PT> unpacked = unpack<INTERNAL, STD_IT, STRING_PT>(sq_[i], sq_.alphabet());
const std::string &content = unpacked.content();
LenSq written = 0;
u_LenSq written = 0;

while (written < content.size()) {
write_sequence_part(content, written);
Expand Down
14 changes: 6 additions & 8 deletions inst/include/tidysq/ops/remove_ambiguous.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,20 +6,15 @@ namespace tidysq {
namespace ops {
template<typename INTERNAL_IN, typename INTERNAL_OUT = INTERNAL_IN>
class OperationRemoveAmbiguous : public OperationRemoveOnCondition<INTERNAL_IN, INTERNAL_OUT> {
bool early_return_;

Alphabet match_dest_alph(const Alphabet &alphabet) {
switch (alphabet.type()) {
case AMI_BSC:
early_return_ = true;
case AMI_EXT:
return Alphabet(AMI_BSC);
case DNA_BSC:
early_return_ = true;
case DNA_EXT:
return Alphabet(DNA_BSC);
case RNA_BSC:
early_return_ = true;
case RNA_EXT:
return Alphabet(RNA_BSC);
default:
Expand All @@ -28,9 +23,13 @@ namespace tidysq {
}

bool may_return_early(const Sq<INTERNAL_IN> &vector_in) override {
return early_return_;
SqType type = this->alph_.type();
return type == AMI_BSC || type == DNA_BSC || type == RNA_BSC;
}

Sq<INTERNAL_OUT> return_early(const Sq<INTERNAL_IN> &vector_in) {
return vector_in;
}


public:
Expand All @@ -44,8 +43,7 @@ namespace tidysq {
this->OperationRemoveOnCondition<INTERNAL_IN, INTERNAL_OUT>::alph_[value]) ||
this->OperationRemoveOnCondition<INTERNAL_IN, INTERNAL_OUT>::alph_.NA_value() == value;
},
by_letter),
early_return_(false) {};
by_letter) {};
};
}

Expand Down
2 changes: 1 addition & 1 deletion inst/include/tidysq/paste.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ namespace tidysq {
element_count += sq[0].get().original_length();
}
} else {
for (LenSq i = 0; i < element_counts.size(); ++i) {
for (u_LenSq i = 0; i < element_counts.size(); ++i) {
element_counts[i] += sq[i].get().original_length();
}
}
Expand Down
2 changes: 1 addition & 1 deletion inst/include/tidysq/sqapply.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ namespace tidysq {
ops::OperationVectorToVector<VECTOR_IN, ELEMENT_IN, VECTOR_OUT, ELEMENT_OUT> &operation) {
if (operation.may_return_early(vector_in)) return operation.return_early(vector_in);
VECTOR_OUT ret = operation.initialize_vector_out(vector_in);
for (LenSq i = 0; i < vector_in.size(); i++) {
for (typeof(vector_in.size()) i = 0; i < vector_in.size(); i++) {
ret[i] = operation(vector_in[i]);
}
return ret;
Expand Down
1 change: 1 addition & 0 deletions inst/include/tidysq/tidysq-typedefs.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

namespace tidysq {
typedef R_xlen_t LenSq;
typedef unsigned long int u_LenSq; //max vector size for comparisons
typedef unsigned char ElementPacked;
typedef unsigned char ElementRaws;
typedef unsigned short int ElementInts;
Expand Down
Loading

0 comments on commit d64ac3f

Please sign in to comment.