|
| 1 | +use wide::{CmpLt, i8x32}; |
| 2 | + |
| 3 | +use crate::packed_seq::read_slice_32; |
| 4 | + |
1 | 5 | use super::*; |
2 | 6 |
|
3 | 7 | #[derive(Clone, Copy, Debug, MemSize, MemDbg)] |
@@ -70,4 +74,32 @@ impl PackedNSeqVec { |
70 | 74 | ambiguous: BitSeqVec::from_ascii(seq), |
71 | 75 | } |
72 | 76 | } |
| 77 | + |
| 78 | + /// Create a mask that is 1 for all non-ACGT bases and for all low-quality bases with quality `<threshold`. |
| 79 | + pub fn from_ascii_and_quality(seq: &[u8], quality: &[u8], threshold: usize) -> Self { |
| 80 | + assert_eq!(seq.len(), quality.len()); |
| 81 | + |
| 82 | + let mut ambiguous = BitSeqVec::from_ascii(seq); |
| 83 | + |
| 84 | + // Low-quality bases are also ambiguous. |
| 85 | + { |
| 86 | + let t = (b'!' + threshold as u8) as i8; |
| 87 | + let t_simd = i8x32::splat(t); |
| 88 | + let ambiguous = ambiguous.seq.as_chunks_mut::<4>().0; |
| 89 | + for i in (0..quality.len()).step_by(32) { |
| 90 | + let chunk = i8x32::from(unsafe { |
| 91 | + std::mem::transmute::<_, i8x32>(read_slice_32(quality, i)) |
| 92 | + }); |
| 93 | + |
| 94 | + let mask = t_simd.cmp_lt(chunk).move_mask() as u32; |
| 95 | + let ambi = &mut ambiguous[i / 32]; |
| 96 | + *ambi = (u32::from_ne_bytes(*ambi) | mask).to_ne_bytes(); |
| 97 | + } |
| 98 | + } |
| 99 | + |
| 100 | + Self { |
| 101 | + seq: PackedSeqVec::from_ascii(seq), |
| 102 | + ambiguous, |
| 103 | + } |
| 104 | + } |
73 | 105 | } |
0 commit comments