Skip to content

Commit 0f4a930

Browse files
authored
Match FoF halos (#191)
* Match fof halos * Update script
1 parent 8f40689 commit 0f4a930

File tree

3 files changed

+178
-63
lines changed

3 files changed

+178
-63
lines changed

misc/match_group_membership.py

Lines changed: 102 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,8 @@
1313
--snap-basename2 SNAP_BASENAME2 \
1414
--membership-basename1 MEMBERSHIP_BASENAME1 \
1515
--membership-basename2 MEMBERSHIP_BASENAME2 \
16-
--soap-filename1 SOAP_FILENAME1 \
17-
--soap-filename2 SOAP_FILENAME2 \
16+
--catalogue-filename1 CATALOGUE_FILENAME1 \
17+
--catalogue-filename2 CATALOGUE_FILENAME2 \
1818
--output-filename OUTPUT_FILENAME
1919
2020
Run "python misc/match_group_membership.py -h" for a discription
@@ -39,7 +39,7 @@
3939
from virgo.mpi.gather_array import gather_array
4040

4141

42-
def load_particle_data(snap_basename, membership_basename, ptypes, comm):
42+
def load_particle_data(snap_basename, membership_basename, ptypes, match_fof, comm):
4343
"""
4444
Load the particle IDs and halo membership for the particle types
4545
we will use to match. Removes unbound particles.
@@ -64,8 +64,14 @@ def load_particle_data(snap_basename, membership_basename, ptypes, comm):
6464
)
6565
halo_catalogue_idx, rank_bound = [], []
6666
for ptype in ptypes:
67-
halo_catalogue_idx.append(file.read(f"PartType{ptype}/GroupNr_bound"))
68-
rank_bound.append(file.read(f"PartType{ptype}/Rank_bound"))
67+
if match_fof:
68+
halo_catalogue_idx.append(file.read(f"PartType{ptype}/FOFGroupIDs"))
69+
# Use particle IDs to decide which particles to keep as most bound,
70+
# which should be nearly random
71+
rank_bound.append(file.read(f"PartType{ptype}/ParticleIDs"))
72+
else:
73+
halo_catalogue_idx.append(file.read(f"PartType{ptype}/GroupNr_bound"))
74+
rank_bound.append(file.read(f"PartType{ptype}/Rank_bound"))
6975
halo_catalogue_idx = np.concatenate(halo_catalogue_idx)
7076
rank_bound = np.concatenate(rank_bound)
7177

@@ -85,19 +91,24 @@ def load_particle_data(snap_basename, membership_basename, ptypes, comm):
8591
}
8692

8793

88-
def load_soap(soap_filename, comm):
94+
def load_catalogue(catalogue_filename, match_fof, comm):
8995
"""
90-
Loads the required fields from a SOAP catalogue
96+
Loads the required fields from a halo catalogue (SOAP/FOF)
9197
"""
9298
# Load particle IDs
9399
file = phdf5.MultiFile(
94-
soap_filename, file_nr_attr=("Header", "NumFilesPerSnapshot"), comm=comm
100+
catalogue_filename, file_nr_attr=("Header", "NumFilesPerSnapshot"), comm=comm
95101
)
96-
return {
97-
"halo_catalogue_idx": file.read(f"InputHalos/HaloCatalogueIndex"),
98-
"host_halo_idx": file.read(f"SOAP/HostHaloIndex"),
99-
"is_central": file.read(f"InputHalos/IsCentral") == 1,
100-
}
102+
if match_fof:
103+
return {
104+
"halo_catalogue_idx": file.read(f"Groups/GroupIDs"),
105+
}
106+
else:
107+
return {
108+
"halo_catalogue_idx": file.read(f"InputHalos/HaloCatalogueIndex"),
109+
"host_halo_idx": file.read(f"SOAP/HostHaloIndex"),
110+
"is_central": file.read(f"InputHalos/IsCentral") == 1,
111+
}
101112

102113

103114
def match_sim(
@@ -106,42 +117,45 @@ def match_sim(
106117
rank_bound,
107118
particle_ids_to_match,
108119
particle_halo_ids_to_match,
109-
soap,
110-
soap_to_match,
120+
catalogue,
121+
catalogue_to_match,
111122
):
112123
"""
113124
Input:
114125
- particle_ids, particle_halo_ids, and rank_bound from simulation 1
115126
- particle_ids_to_match, and particle_halo_ids_to_match from simulation 2
116-
- soap catalogue from simulation 1
117-
- soap_to_match catalogue from simulation 2
127+
- catalogue from simulation 1
128+
- catalogue_to_match from simulation 2
118129
119130
Returns halo_ids, matched_halo_ids, n_match
120131
"""
121132

122-
if args.centrals_only:
133+
if (not args.match_fof) and (not args.match_satellites):
123134
# Only keep particles in simulation 1 which are bound to a central
124135
idx = psort.parallel_match(
125-
particle_halo_ids, soap["halo_catalogue_idx"], comm=comm
136+
particle_halo_ids, catalogue["halo_catalogue_idx"], comm=comm
126137
)
127138
assert np.all(idx >= 0), "Some subhalos could not be found"
128-
particle_is_cen = psort.fetch_elements(soap["is_central"], idx, comm=comm)
139+
particle_is_cen = psort.fetch_elements(catalogue["is_central"], idx, comm=comm)
129140
particle_ids = particle_ids[particle_is_cen]
130141
particle_halo_ids = particle_halo_ids[particle_is_cen]
131142
rank_bound = rank_bound[particle_is_cen]
132143

133-
# Replace satellite halo_ids of particles in simulation 2 with their host halo_id
144+
# Replace satellite halo_ids of particles in simulation 2
145+
# with their host halo_id
134146
idx = psort.parallel_match(
135-
particle_halo_ids_to_match, soap_to_match["halo_catalogue_idx"], comm=comm
147+
particle_halo_ids_to_match,
148+
catalogue_to_match["halo_catalogue_idx"],
149+
comm=comm,
136150
)
137151
is_sat = np.logical_not(
138-
psort.fetch_elements(soap_to_match["is_central"], idx, comm=comm)
152+
psort.fetch_elements(catalogue_to_match["is_central"], idx, comm=comm)
139153
)
140154
host_halo_idx = psort.fetch_elements(
141-
soap_to_match["host_halo_idx"], idx[is_sat], comm=comm
155+
catalogue_to_match["host_halo_idx"], idx[is_sat], comm=comm
142156
)
143157
host_halo_catalogue_idx = psort.fetch_elements(
144-
soap_to_match["halo_catalogue_idx"], host_halo_idx, comm=comm
158+
catalogue_to_match["halo_catalogue_idx"], host_halo_idx, comm=comm
145159
)
146160
particle_halo_ids_to_match[is_sat] = host_halo_catalogue_idx
147161

@@ -241,19 +255,20 @@ def match_sim(
241255
matched_halo_ids = matched_halo_ids[idx]
242256
combined_counts = combined_counts[idx]
243257

244-
# Get the SOAP index of each object in matched_halo_ids
245-
matched_soap_idx = psort.parallel_match(
246-
matched_halo_ids, soap_to_match["halo_catalogue_idx"], comm=comm
258+
# Get the catalogue index of each object in matched_halo_ids
259+
matched_catalogue_idx = psort.parallel_match(
260+
matched_halo_ids, catalogue_to_match["halo_catalogue_idx"], comm=comm
247261
)
248262

249-
# Create arrays we will return. These have the same length as the arrays in `soap`
250-
match_index = -1 * np.ones_like(soap["halo_catalogue_idx"])
251-
match_count = np.zeros_like(soap["halo_catalogue_idx"])
263+
# Create arrays we will return. These have the same length as
264+
# the arrays in the input catalogue
265+
match_index = -1 * np.ones_like(catalogue["halo_catalogue_idx"])
266+
match_count = np.zeros_like(catalogue["halo_catalogue_idx"])
252267

253268
# Retrieve the values we require, skipping halos which don't have a match
254-
idx = psort.parallel_match(soap["halo_catalogue_idx"], halo_ids)
269+
idx = psort.parallel_match(catalogue["halo_catalogue_idx"], halo_ids)
255270
match_index[idx != -1] = psort.fetch_elements(
256-
matched_soap_idx, idx[idx != -1], comm=comm
271+
matched_catalogue_idx, idx[idx != -1], comm=comm
257272
)
258273
match_count[idx != -1] = psort.fetch_elements(
259274
combined_counts, idx[idx != -1], comm=comm
@@ -305,7 +320,11 @@ def mpi_print(string, comm_rank):
305320
if __name__ == "__main__":
306321

307322
parser = argparse.ArgumentParser(
308-
description=("Script to calculate extent of FoF groups.")
323+
description=(
324+
"Script to match halos across runs by comparing particles"
325+
"Default is to match SOAP catalgoues, but can also match"
326+
"FoF catalgoues"
327+
),
309328
)
310329
parser.add_argument(
311330
"--snap-basename1",
@@ -320,37 +339,37 @@ def mpi_print(string, comm_rank):
320339
"--snap-basename2",
321340
type=str,
322341
required=True,
323-
help=("The basename of the snapshot files for simulation 2"),
342+
help="The basename of the snapshot files for simulation 2",
324343
)
325344
parser.add_argument(
326345
"--membership-basename1",
327346
type=str,
328347
required=True,
329-
help=("The basename of the membership files for simulation 1"),
348+
help="The basename of the membership files for simulation 1",
330349
)
331350
parser.add_argument(
332351
"--membership-basename2",
333352
type=str,
334353
required=True,
335-
help=("The basename of the membership files for simulation 2"),
354+
help="The basename of the membership files for simulation 2",
336355
)
337356
parser.add_argument(
338-
"--soap-filename1",
357+
"--catalogue-filename1",
339358
type=str,
340359
required=True,
341-
help=("The filename of the SOAP catalogue for simulation 1"),
360+
help="The filename of the catalogue for simulation 1",
342361
)
343362
parser.add_argument(
344-
"--soap-filename2",
363+
"--catalogue-filename2",
345364
type=str,
346365
required=True,
347-
help=("The filename of the SOAP catalogue for simulation 2"),
366+
help="The filename of the catalogue for simulation 2",
348367
)
349368
parser.add_argument(
350369
"--output-filename",
351370
type=str,
352371
required=True,
353-
help=("The filename of the output file"),
372+
help="The filename of the output file",
354373
)
355374
parser.add_argument(
356375
"--ptypes",
@@ -365,33 +384,56 @@ def mpi_print(string, comm_rank):
365384
type=int,
366385
required=False,
367386
default=50,
368-
help="Number of particles to use when matching. Defaults to 50. -1 to use all particles",
387+
help=(
388+
"Number of particles to use when matching. Defaults to 50. "
389+
"Pass -1 to use all particles"
390+
),
369391
)
370392
parser.add_argument(
371-
"--centrals-only",
372-
type=bool,
373-
required=False,
374-
default=True,
375-
help="Only match central halos. Defaults to True",
393+
"--match-satellites",
394+
action="store_true",
395+
help="Attempt to match satellites subhalos as well as centrals",
376396
)
397+
parser.add_argument(
398+
"--match-fof",
399+
action="store_true",
400+
help=(
401+
"Whether to match FoF catalogues instead of SOAP catalogues. "
402+
"If using this option then pass the snapshots as both the "
403+
"--snap-basename and the --membership-basename. Pass the FoF "
404+
"catalogues as --catalogue-filename"
405+
),
406+
)
407+
377408
args = parser.parse_args()
378409

379410
# Log the arguments
380411
if comm_rank == 0:
381412
for k, v in vars(args).items():
382413
print(f" {k}: {v}")
383414

415+
if args.match_fof:
416+
assert not args.match_satellites
417+
384418
mpi_print("Loading data from simulation 1", comm_rank)
385419
data_1 = load_particle_data(
386-
args.snap_basename1, args.membership_basename1, args.ptypes, comm
420+
args.snap_basename1,
421+
args.membership_basename1,
422+
args.ptypes,
423+
args.match_fof,
424+
comm,
387425
)
388-
soap_1 = load_soap(args.soap_filename1, comm)
426+
catalogue_1 = load_catalogue(args.catalogue_filename1, args.match_fof, comm)
389427

390428
mpi_print("Loading data from simulation 2", comm_rank)
391429
data_2 = load_particle_data(
392-
args.snap_basename2, args.membership_basename2, args.ptypes, comm
430+
args.snap_basename2,
431+
args.membership_basename2,
432+
args.ptypes,
433+
args.match_fof,
434+
comm,
393435
)
394-
soap_2 = load_soap(args.soap_filename2, comm)
436+
catalogue_2 = load_catalogue(args.catalogue_filename2, args.match_fof, comm)
395437

396438
mpi_print("Removing particles which are not bound in both snapshots", comm_rank)
397439
idx = psort.parallel_match(
@@ -413,8 +455,8 @@ def mpi_print(string, comm_rank):
413455
data_1["rank_bound"],
414456
data_2["particle_ids"],
415457
data_2["halo_catalogue_idx"],
416-
soap_1,
417-
soap_2,
458+
catalogue_1,
459+
catalogue_2,
418460
)
419461

420462
mpi_print("Matching simulation 2 to simulation 1", comm_rank)
@@ -424,8 +466,8 @@ def mpi_print(string, comm_rank):
424466
data_2["rank_bound"],
425467
data_1["particle_ids"],
426468
data_1["halo_catalogue_idx"],
427-
soap_2,
428-
soap_1,
469+
catalogue_2,
470+
catalogue_1,
429471
)
430472

431473
mpi_print("Checking matches for consistency", comm_rank)
@@ -442,12 +484,13 @@ def mpi_print(string, comm_rank):
442484
("snap-basename2", args.snap_basename2),
443485
("membership-basename1", args.membership_basename1),
444486
("membership-basename2", args.membership_basename2),
445-
("soap-filename1", args.soap_filename1),
446-
("soap-filename2", args.soap_filename2),
487+
("catalogue-filename1", args.catalogue_filename1),
488+
("catalogue-filename2", args.catalogue_filename2),
447489
("output-filename", args.output_filename),
448490
("ptypes", args.ptypes),
449491
("nr-particles", args.nr_particles),
450-
("centrals-only", args.centrals_only),
492+
("match-satellites", args.match_satellites),
493+
("match-fof", args.match_fof),
451494
]:
452495
header.attrs[k] = v
453496
comm.barrier()

scripts/COLIBRE/match_fof.sh

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
#!/bin/bash -l
2+
#
3+
# Match halos between different colibre runs with the
4+
# same box size and resolution. Pass the snapshots to
5+
# run when calling this script, e.g.
6+
#
7+
# cd SOAP
8+
# mkdir logs
9+
# sbatch -array=92,127%1 ./scripts/COLIBRE/match_colibre.sh
10+
#
11+
#SBATCH --nodes=1
12+
#SBATCH --cpus-per-task=1
13+
#SBATCH -J match_colibre
14+
#SBATCH -o ./logs/%x_%a.out
15+
#SBATCH -p cosma8
16+
#SBATCH -A dp004
17+
#SBATCH -t 1:00:00
18+
#
19+
20+
set -e
21+
22+
module purge
23+
module load python/3.12.4 gnu_comp/14.1.0 openmpi/5.0.3 parallel_hdf5/1.12.3
24+
source openmpi-5.0.3-hdf5-1.12.3-env/bin/activate
25+
26+
# Snapshot to do
27+
snapnum=`printf '%04d' ${SLURM_ARRAY_TASK_ID}`
28+
29+
# Where to put the output files
30+
outdir="/snap8/scratch/dp004/${USER}/COLIBRE/matching/"
31+
32+
# Sims to match
33+
sims=(
34+
"L0025N0188/Thermal L0025N0188/DMO"
35+
)
36+
for sim in "${sims[@]}"; do
37+
set -- $sim
38+
sim1=$1
39+
sim2=$2
40+
41+
# Location of the input files
42+
basedir="/cosma8/data/dp004/colibre/Runs"
43+
snap_basename1="${basedir}/${sim1}/snapshots/colibre_${snapnum}/colibre_${snapnum}"
44+
snap_basename2="${basedir}/${sim2}/snapshots/colibre_${snapnum}/colibre_${snapnum}"
45+
membership_basename1="${basedir}/${sim1}/snapshots/colibre_${snapnum}/colibre_${snapnum}"
46+
membership_basename2="${basedir}/${sim2}/snapshots/colibre_${snapnum}/colibre_${snapnum}"
47+
fof_filename1="${basedir}/${sim1}/fof/fof_output_${snapnum}/fof_output_${snapnum}.{file_nr}.hdf5"
48+
fof_filename2="${basedir}/${sim2}/fof/fof_output_${snapnum}/fof_output_${snapnum}.{file_nr}.hdf5"
49+
50+
# Matching parameters
51+
nr_particles=50
52+
53+
# Name of output file
54+
mkdir -p ${outdir}
55+
sim1=$(echo $sim1 | tr '/' '_')
56+
sim2=$(echo $sim2 | tr '/' '_')
57+
output_filename="${outdir}/match_fof_${sim1}_${sim2}_${snapnum}.${nr_particles}.hdf5"
58+
59+
echo
60+
echo Matching $sim1 to $sim2, snapshot ${snapnum}
61+
mpirun -- python -u misc/match_group_membership.py \
62+
--snap-basename1 ${snap_basename1}\
63+
--snap-basename2 ${snap_basename2}\
64+
--membership-basename1 ${membership_basename1}\
65+
--membership-basename2 ${membership_basename2}\
66+
--catalogue-filename1 ${fof_filename1} \
67+
--catalogue-filename2 ${fof_filename2} \
68+
--output-filename ${output_filename}\
69+
--nr-particles ${nr_particles} \
70+
--match-fof
71+
72+
done
73+
74+
echo "Job complete!"

0 commit comments

Comments
 (0)