@@ -1249,128 +1249,22 @@ impl<'s> PackedSeqBase<'s, 1> {
12491249 let o = this. offset ;
12501250 assert ! ( o < Self :: C8 ) ;
12511251
1252- let num_kmers = if this. len == 0 {
1253- 0
1254- } else {
1255- ( this. len + o) . saturating_sub ( context - 1 )
1256- } ;
1257- // without +o, since we don't need them in the stride.
1258- let num_kmers_stride = this. len . saturating_sub ( context - 1 ) ;
1259- let n = num_kmers_stride. div_ceil ( L ) . next_multiple_of ( Self :: C8 ) ;
1260- let bytes_per_chunk = n / Self :: C8 ;
1261- let padding = Self :: C8 * L * bytes_per_chunk - num_kmers_stride;
1252+ let delay = k - 1 ;
12621253
1263- let offsets : [ usize ; 8 ] = from_fn ( |l| l * bytes_per_chunk ) ;
1254+ let it = self . par_iter_bp_delayed ( context , Delay ( delay ) ) ;
12641255
1265- // prev2 prev cur
1266- // 0..31 | 32..63
1267- // mask 00001111110000
1268- // mask 00000111111000
1269- // mask 00000011111100
1270- // mask 00000001111110
1271- // mask 00000000111111
1272- // cur next
1273- // 32..63| 64..95
1274- // mask 11111100000000
1275-
1276- // [prev2, prev, cur]
1277- let mut cur = [ S :: ZERO ; 4 ] ;
1278- let mut mask = [ S :: ZERO ; 4 ] ;
1279- match k {
1280- 1 ..=32 => {
1281- mask[ 3 ] = ( S :: MAX ) << S :: splat ( 32 - k as u32 ) ;
1282- }
1283- 33 ..=64 => {
1284- mask[ 3 ] = S :: MAX ;
1285- mask[ 2 ] = ( S :: MAX ) << S :: splat ( 64 - k as u32 ) ;
1286- }
1287- 65 ..=96 => {
1288- mask[ 3 ] = S :: MAX ;
1289- mask[ 2 ] = S :: MAX ;
1290- mask[ 1 ] = ( S :: MAX ) << S :: splat ( 96 - k as u32 ) ;
1291- }
1292- _ => unreachable ! ( ) ,
1293- }
1294-
1295- #[ inline( always) ]
1296- fn rotate_mask ( mask : & mut [ S ; 4 ] , lshift : & S , rshift : & S ) {
1297- let carry01 = mask[ 0 ] >> * rshift;
1298- let carry12 = mask[ 1 ] >> * rshift;
1299- let carry23 = mask[ 2 ] >> * rshift;
1300- mask[ 0 ] = mask[ 0 ] << * lshift;
1301- mask[ 1 ] = ( mask[ 1 ] << * lshift) | carry01;
1302- mask[ 2 ] = ( mask[ 2 ] << * lshift) | carry12;
1303- mask[ 3 ] = ( mask[ 3 ] << * lshift) | carry23;
1304- }
1256+ let mut cnt = u32x8:: ZERO ;
13051257
1306- // Boxed, so it doesn't consume precious registers.
1307- // Without this, cur is not always inlined into a register.
1308- let mut buf = IT_BUF . with_borrow_mut ( |v| RecycledBox ( v. pop ( ) ) ) ;
1309- buf. init_if_needed ( ) ;
1310-
1311- // We skip the first o iterations.
1312- let par_len = if num_kmers == 0 { 0 } else { n + k + o - 1 } ;
1313-
1314- let mut read = {
1315- #[ inline( always) ]
1316- move |i : usize , cur : & mut [ S ; 4 ] | {
1317- if i % Self :: C256 == 0 {
1318- // Read a u256 for each lane containing the next 128 characters.
1319- let data: [ u32x8 ; 8 ] = from_fn (
1320- #[ inline( always) ]
1321- |lane| read_slice_32 ( this. seq , offsets[ lane] + ( i / Self :: C8 ) ) ,
1322- ) ;
1323- * buf. get_mut ( ) = transpose ( data) ;
1324- }
1325- cur[ 0 ] = cur[ 1 ] ;
1326- cur[ 1 ] = cur[ 2 ] ;
1327- cur[ 2 ] = cur[ 3 ] ;
1328- cur[ 3 ] = buf. get ( ) [ ( i % Self :: C256 ) / Self :: C32 ] ;
1329- }
1330- } ;
1331-
1332- // Precompute the first o+skip iterations.
1333- let mut to_skip = o + skip;
1334- let mut i = 0 ;
1335- let lshift = S :: splat ( to_skip as u32 ) ;
1336- let rshift = S :: splat ( ( 32 - ( to_skip % 32 ) ) as u32 ) ;
1337- while to_skip > 0 {
1338- read ( i, & mut cur) ;
1339- i += 32 ;
1340- if to_skip >= 32 {
1341- to_skip -= 32 ;
1342- } else {
1343- mask[ 0 ] = mask[ 1 ] ;
1344- mask[ 1 ] = mask[ 2 ] ;
1345- mask[ 2 ] = mask[ 3 ] ;
1346- mask[ 3 ] = S :: ZERO ;
1347- // rotate mask by remainder
1348- rotate_mask ( & mut mask, & lshift, & rshift) ;
1349- break ;
1350- }
1351- }
1352-
1353- let lshift = S :: ONE ;
1354- let rshift = S :: splat ( 31 ) ;
1355-
1356- let it = ( o + skip..par_len) . map (
1258+ let it = it. map (
13571259 #[ inline( always) ]
1358- move |i| {
1359- if i % Self :: C32 == 0 {
1360- read ( i, & mut cur) ;
1361- mask[ 0 ] = mask[ 1 ] ;
1362- mask[ 1 ] = mask[ 2 ] ;
1363- mask[ 2 ] = mask[ 3 ] ;
1364- mask[ 3 ] = S :: ZERO ;
1365- }
1366-
1367- rotate_mask ( & mut mask, & lshift, & rshift) ;
1368- !( ( cur[ 0 ] & mask[ 0 ] ) | ( cur[ 1 ] & mask[ 1 ] ) | ( cur[ 2 ] & mask[ 2 ] ) | ( cur[ 3 ] & mask[ 3 ] ) )
1369- . cmp_eq ( S :: ZERO )
1260+ move |( a, r) | {
1261+ cnt += a;
1262+ let out = cnt. cmp_gt ( S :: ZERO ) ;
1263+ cnt -= r;
1264+ out
13701265 } ,
13711266 ) ;
1372-
1373- PaddedIt { it, padding }
1267+ it. advance ( skip)
13741268 }
13751269}
13761270
0 commit comments