diff --git a/python_files/postprocessing/5_create_dfs_per_core.py b/python_files/postprocessing/5_create_dfs_per_core.py
index e8b14c1..5669383 100644
--- a/python_files/postprocessing/5_create_dfs_per_core.py
+++ b/python_files/postprocessing/5_create_dfs_per_core.py
@@ -1042,6 +1042,132 @@
 deduped_distance_df_timepoint.to_csv(os.path.join(output_dir, 'distance_df_per_timepoint_deduped.csv'), index=False)
 
 
+# occupancy stat summary
+
+# Create list to hold parameters for each df that will be produced
+occupancy_df_params = [['cluster_broad_freq', 'cell_cluster_broad']]
+
+occupancy_dfs = []
+for result_name, cluster_col_name in occupancy_df_params:
+
+    # remove cluster_names except for the one specified for the df
+    drop_cols = ['cell_meta_cluster', 'cell_cluster', 'cell_cluster_broad', 'label']
+    drop_cols.remove(cluster_col_name)
+
+    # create df
+    occupancy_dfs.append(create_long_df_by_occupancy(cell_table=cell_table_occupancy,
+                                                     result_name=result_name,
+                                                     cluster_col_name=cluster_col_name,
+                                                     drop_cols=drop_cols,
+                                                     zscore_threshold=0.0))
+
+# create combined df
+total_df_occupancy = pd.concat(occupancy_dfs, axis=0)
+total_df_occupancy = total_df_occupancy.merge(harmonized_metadata, on='fov', how='inner')
+total_df_occupancy = total_df_occupancy.rename(columns={'functional_marker': 'occupancy_feature'})
+
+# save df
+total_df_occupancy.to_csv(os.path.join(output_dir, 'occupancy_df_per_core.csv'), index=False)
+
+# filter diversity scores to only include FOVs with at least the specified number of cells
+total_df = pd.read_csv(os.path.join(output_dir, 'cluster_df_per_core.csv'))
+min_cells = 5
+
+filtered_dfs = []
+metrics = [['cluster_broad_count', 'cluster_broad_freq'],
+           ['cluster_count', 'cluster_freq'],
+           ['meta_cluster_count', 'meta_cluster_freq']]
+for metric in metrics:
+    # subset count df to include cells at the relevant clustering resolution
+    for compartment in ['all']:
+        count_df = total_df[total_df.metric == metric[0]]
+        count_df = count_df[count_df.subset == compartment]
+
+        # subset diversity df to only include diversity metrics at this resolution
+        occupancy_df = total_df_occupancy[total_df_occupancy.metric == metric[1]]
+        occupancy_df = occupancy_df[occupancy_df.subset == compartment]
+
+        # for each cell type, determine which FOVs have high enough counts to be included
+        for cell_type in count_df.cell_type.unique():
+            keep_df = count_df[count_df.cell_type == cell_type]
+            keep_df = keep_df[keep_df.value >= min_cells]
+            keep_fovs = keep_df.fov.unique()
+
+            # subset morphology df to only include FOVs with high enough counts
+            keep_features = occupancy_df[occupancy_df.cell_type == cell_type]
+            keep_features = keep_features[keep_features.fov.isin(keep_fovs)]
+
+            # append to list of filtered dfs
+            filtered_dfs.append(keep_features)
+
+filtered_occupancy_df = pd.concat(filtered_dfs)
+
+# save filtered df
+filtered_occupancy_df.to_csv(os.path.join(output_dir, 'occupancy_df_per_core_filtered.csv'), index=False)
+
+# remove distances that are correlated with abundance of cell type
+
+if not os.path.exists(os.path.join(intermediate_dir, 'post_processing', 'occupancy_df_keep_features.csv')):
+    density_df = total_df.loc[(total_df.metric == 'cluster_broad_density') & (total_df.subset == 'all')]
+
+    # NOTE: this is purely for consistency, the table when created only includes the cluster_broad_freq metric and all compartments
+    filtered_occupancy_df = filtered_occupancy_df.loc[(filtered_occupancy_df.metric == 'cluster_broad_freq') & (filtered_occupancy_df.subset == 'all')]
+
+    # remove images without tumor cells
+    density_df = density_df.loc[density_df.Timepoint != 'lymphnode_neg', :]
+    filtered_occupancy_df = filtered_occupancy_df.loc[filtered_occupancy_df.fov.isin(density_df.fov.unique())]
+    cell_types = filtered_occupancy_df.cell_type.unique()
+
+    # calculate which pairings to keep
+    keep_cells, keep_features = [], []
+
+    for cell_type in cell_types:
+        density_subset = density_df.loc[density_df.cell_type == cell_type]
+        occupancy_subset = filtered_occupancy_df.loc[filtered_occupancy_df.linear_distance == cell_type]
+        occupancy_wide = occupancy_subset.pivot(index='fov', columns='cell_type', values='value')
+        occupancy_wide.reset_index(inplace=True)
+        occupancy_wide = pd.merge(occupancy_wide, density_subset[['fov', 'value']], on='fov', how='inner')
+
+        # get correlations
+        corr_df = occupancy_wide.corr(method='spearman')
+
+        # determine which features to keep
+        corr_vals = corr_df.loc['value', :].abs()
+        corr_vals = corr_vals[corr_vals < 0.7]
+
+        # add to list of features to keep
+        keep_cells.extend(corr_vals.index)
+        keep_features.extend([cell_type] * len(corr_vals.index))
+
+    keep_df = pd.DataFrame({'cell_type': keep_cells, 'feature_name': keep_features})
+
+    keep_df.to_csv(os.path.join(intermediate_dir, 'post_processing', 'occupancy_df_keep_features.csv'), index=False)
+else:
+    keep_df = pd.read_csv(os.path.join(intermediate_dir, 'post_processing', 'occupancy_df_keep_features.csv'))
+
+deduped_dfs = []
+for cell_type in keep_df.cell_type.unique():
+    keep_features = keep_df.loc[keep_df.cell_type == cell_type, 'feature_name'].unique()
+    if len(keep_features) > 0:
+        keep_df_subset = filtered_occupancy_df.loc[filtered_occupancy_df.cell_type == cell_type]
+        keep_df_subset = keep_df_subset.loc[keep_df_subset.linear_distance.isin(keep_features)]
+        deduped_dfs.append(keep_df_subset)
+
+deduped_occupancy_df = pd.concat(deduped_dfs)
+
+# save filtered df
+deduped_occupancy_df.to_csv(os.path.join(output_dir, 'occupancy_df_per_core_deduped.csv'), index=False)
+
+# create version aggregated by timepoint
+deduped_occupancy_df_grouped = deduped_occupancy_df.groupby(['Tissue_ID', 'cell_type', 'linear_distance', 'metric', 'subset'])
+deduped_occupancy_df_timepoint = deduped_occupancy_df_grouped['value'].agg([np.mean, np.std])
+deduped_occupancy_df_timepoint.reset_index(inplace=True)
+deduped_occupancy_df_timepoint = deduped_occupancy_df_timepoint.merge(harmonized_metadata.drop(['fov', 'MIBI_data_generated'], axis=1).drop_duplicates(), on='Tissue_ID')
+
+# save timepoint df
+deduped_occupancy_df_timepoint.to_csv(os.path.join(output_dir, 'occupancy_df_per_timepoint_deduped.csv'), index=False)
+
+
 # fiber analysis
 fiber_stats.columns = fiber_stats.columns.str.replace('avg_', '')
 fiber_stats.columns = fiber_stats.columns.str.replace('fiber_', '')
diff --git a/python_files/supplementary_plot_helpers.py b/python_files/supplementary_plot_helpers.py
index db6ca2a..bb7de7d 100644
--- a/python_files/supplementary_plot_helpers.py
+++ b/python_files/supplementary_plot_helpers.py
@@ -1,5 +1,7 @@
+import itertools
 import json
 import math
+import matplotlib.colors as mcolors
 import matplotlib.pyplot as plt
 from matplotlib.ticker import ScalarFormatter, MaxNLocator
 import natsort as ns
@@ -722,6 +724,386 @@ def stitch_before_after_norm(
     post_norm_tiled.save(post_norm_stitched_path)
 
 
+def occupancy_division_factor(occupancy_group: pd.api.typing.DataFrameGroupBy,
+                              tiles_per_row_col: int,
+                              max_image_size: int,
+                              cell_table: pd.DataFrame):
+    """Compute the denominator to ensure the proper percentage is computed for occupancy stats
+
+    Args:
+        occupancy_group (pd.api.typing.DataFrameGroupBy):
+            The group by object for the FOV and cell type
+        tiles_per_row_col (int):
+            The row/col dims of tiles to define over each image
+        max_image_size (int):
+            Maximum size of an image passed in, assuming square images
+            NOTE: all images should have an image size that is a factor of max_image_size
+        cell_table (pd.DataFrame):
+            The cell table associated with the cohort
+
+    Returns:
+        float:
+            The factor to divide the occupancy stat positive counts by
+    """
+    fov_name = occupancy_group["fov"].values[0]
+    image_size = cell_table.loc[cell_table["fov"] == fov_name, "fov_pixel_size"].values[0]
+    return tiles_per_row_col ** 2 / (max_image_size / image_size)
+
+
+# occupancy statistic helpers
+def compute_occupancy_statistics(
+    cell_table: pd.DataFrame, pop_col: str = "cell_cluster_broad",
+    pop_subset: Optional[List[str]] = None, tiles_per_row_col: int = 4,
+    max_image_size: int = 2048, positive_threshold: int = 0.0, sample_dir: Union[str, pathlib.Path]
+):
+    """Compute the occupancy statistics over a cohort, based on z-scored cell counts per tile 
+    across the cohort.
+
+    Args:
+        cell_table (pd.DataFrame):
+            The cell table associated with the cohort, need to contain all FOVs as well as 
+            a column indicating the size of the FOV.
+        pop_col (str):
+            Column containing the names of the cell populations
+        pop_subset (Optional[List[str]]):
+            Which populations, if any, to subset on. If None, use all populations
+        tiles_per_row_col (int):
+            The row/col dims of tiles to define over each image
+        max_image_size (int):
+            Maximum size of an image passed in, assuming square images
+            NOTE: all images should have an image size that is a factor of `max_image_size`
+        positive_threshold (int):
+            The z-scored cell count in a tile required for positivity
+
+    Returns:
+        Dict[str, float]:
+            A dictionary mapping each FOV to the percentage of tiles above the positive threshold 
+            for number of cells.
+    """
+    # get all the FOV names
+    fov_names: np.ndarray = cell_table["fov"].unique()
+
+    # if a population subset is specified, first validate then truncate the cell table accordingly
+    if pop_subset:
+        verify_in_list(
+            specified_populations=pop_subset,
+            valid_populations=cell_table[pop_col].unique()
+        )
+    # otherwise, use all possible subsets as defined by pop_col
+    else:
+        pop_subset = cell_table[pop_col].unique()
+
+    # define the tile size in pixels for each FOV
+    cell_table["tile_size"] = max_image_size // tiles_per_row_col
+
+    # define the tile for each cell
+    cell_table["tile_row"] = (cell_table["centroid-1"] // cell_table["tile_size"]).astype(int)
+    cell_table["tile_col"] = (cell_table["centroid-0"] // cell_table["tile_size"]).astype(int)
+
+    # Create a DataFrame with all combinations of fov, tile_row, tile_col, and pop_col
+    all_combos = pd.MultiIndex.from_product(
+        [
+            cell_table["fov"].unique(),
+            cell_table["tile_row"].unique(),
+            cell_table["tile_col"].unique(),
+            cell_table[pop_col].unique()
+        ],
+        names=["fov", "tile_row", "tile_col", pop_col]
+    ).to_frame(index=False)
+
+    # compute the total occupancy of each tile for each population type
+    # NOTE: need to merge this way to account for tiles with 0 counts
+    occupancy_counts: pd.DataFrame = cell_table.groupby(
+        ["fov", "tile_row", "tile_col", pop_col]
+    ).size().reset_index(name="occupancy")
+    occupancy_counts = pd.merge(
+        all_combos, occupancy_counts,
+        on=["fov", "tile_row", "tile_col", pop_col],
+        how="left"
+    )
+    occupancy_counts["occupancy"].fillna(0, inplace=True)
+    # print(occupancy_counts[["tile_row", "tile_col"]].drop_duplicates())
+
+    # zscore the tile counts across the cohort grouped by the cell types
+    occupancy_counts["zscore_occupancy"] = occupancy_stats.groupby(
+        [pop_col]
+    )["occupancy"].transform(lambda x: zscore(x, ddof=0))
+
+    # mark tiles as positive depending on the positive_threshold value
+    occupancy_counts["is_positive"] = occupancy_counts["zscore_occupancy"] > positive_threshold
+
+    # total up the positive tiles per FOV and cell type
+    occupancy_counts_grouped: pd.DataFrame = occupancy_counts.groupby(["fov", pop_col]).apply(
+        lambda row: row["is_positive"].sum()
+    ).reset_ondex("total_positive_tiles")
+
+    # occupancy_counts_grouped: pd.DataFrame = occupancy_counts.groupby(["fov", pop_col]).apply(
+    #     lambda row: row["is_positive"].sum() / occupancy_division_factor(
+    #         row, tiles_per_row_col, max_image_size, cell_table
+    #     ) * 100
+    # ).reset_index(name="percent_positive")
+    occupancy_counts_grouped: pd.DataFrame = occupancy_counts.groupby(["fov", pop_col]).apply(
+        lambda row: row["is_positive"].sum()
+    ).reset_index(name="total_positive_tiles")
+
+    # get the max image size, this will determine how many tiles there are when finding percentages
+    occupancy_counts_grouped["image_size"] = occupancy_counts_grouped.apply(
+        lambda row: io.imread(os.path.join(samples_dir, row["fov"], "Calprotectin.tiff")).shape[0],
+        axis=1
+    )
+    occupancy_stats_grouped["max_tiles"] = occupancy_stats_grouped.apply(
+        lambda row: tiles_per_row_col ** 2 ** (1 / (max_image_size / row["image_size"])),
+        axis=1
+    )
+
+    # determine the percent positivity of tiles
+    occupancy_counts_grouped["percent_positive_tiles"] = \
+        occupancy_stats_grouped["total_positive_tiles"] / occupancy_stats_grouped["max_tiles"]
+
+    # occupancy_stats_grouped: pd.DataFrame = occupancy_stats.groupby(
+    #     ["fov", "cell_cluster_broad"]
+    # ).apply(lambda row: row["is_positive"].sum()).reset_index(name="total_positive_tiles")
+
+    # # everything after here is grouped
+    # occupancy_stats_grouped["num_cells"] = occupancy_stats.groupby(
+    #     ["fov", "cell_cluster_broad", "positivity_threshold"]
+    # ).apply(lambda row: row["occupancy"].sum()).values
+
+    # occuapcny_stats_grouped["max_tiles"] = occupancy_count
+
+    return occupancy_counts_grouped
+
+    # return occupancy_counts, occupancy_counts_grouped
+
+    # # define the occupancy_stats array
+    # occupancy_stats = xr.DataArray(
+    #     np.zeros((len(fov_names), tiles_per_row_col, tiles_per_row_col), dtype=np.int16),
+    #     coords=[fov_names, np.arange(tiles_per_row_col), np.arange(tiles_per_row_col)],
+    #     dims=["fov", "x", "y"]
+    # )
+
+    # # iterate over each population
+    # for pop in pop_subset:
+    #     cell_table_pop = cell_table[cell_table[pop_col] == pop].copy()
+
+    #     # Group by FOV and tile positions, then count occupancy
+    #     occupancy_counts: pd.DataFrame = cell_table_pop.groupby(
+    #         ["fov", "tile_row", "tile_col"]
+    #     ).size().reset_index(name="occupancy")
+
+    #     # define the occupancy_stats array
+    #     occupancy_stats = xr.DataArray(
+    #         np.zeros((len(fov_names), tiles_per_row_col, tiles_per_row_col), dtype=np.int16),
+    #         coords=[fov_names, np.arange(tiles_per_row_col), np.arange(tiles_per_row_col)],
+    #         dims=["fov", "x", "y"]
+    #     )
+
+    #     # Update the DataArray based on occupancy_counts without explicit Python loops
+    #     for index, row in occupancy_counts.iterrows():
+    #         occupancy_stats.loc[row["fov"], row["tile_row"], row["tile_col"]] = row["occupancy"]
+
+    #     # define the tiles that are positive based on threshold
+    #     occupancy_stats_positivity: xr.DataArray = occupancy_stats > positive_threshold
+
+    #     # compute the percentage of positive tiles for each FOV
+    #     occupancy_stats_sum: xr.DataArray = occupancy_stats_positivity.sum(
+    #         dim=['x', 'y']
+    #     ) / (tiles_per_row_col ** 2)
+
+    #     # convert to dict
+    #     occupancy_stats_dict: Dict[str, float] = occupancy_stats_sum.to_series().to_dict()
+    #     occupancy_stats_dict = {fov: percentage for fov, percentage in occupancy_stats_dict.items()}
+
+    #     return occupancy_stats_dict
+
+
+def visualize_occupancy_statistics(
+    occupancy_stats_table: pd.DataFrame, save_dir: Union[str, pathlib.Path],
+    pop_col: str = "cell_cluster_broad", pop_subset: Optional[List[str]] = None,
+    figsize: Optional[Tuple[float, float]] = (10, 10)
+):
+    """Visualize the distribution of the percentage of tiles above a positive threshold.
+
+    Data needs to be generated by compute_occupancy_statistics.
+
+    Args:
+        occupancy_stats_table (pd.DataFrame):
+            The table generated by compute_occupancy_statistics, lists the percentage of positive 
+            tiles for each image of the cohort at different positivity thresholds and grid sizes
+        save_dir (Union[str, pathlib.Path]):
+            Directory to save the visualizations in
+        pop_col (str):
+            Column containing the names of the cell populations
+        pop_subset (Optional[List[str]]):
+            Which populations, if any, to subset on. If None, use all populations
+        fig_size (Optional[Tuple[float, float]]):
+            The figure size to use for the image.
+    """
+    if pop_subset is not None:
+        verify_in_list(
+            specified_populations=pop_subset,
+            valid_populations=occupancy_stats_table[pop_col].unique()
+        )
+    else:
+        pop_subset = occupancy_stats_table[pop_col].unique()
+
+    for pop in pop_subset:
+        occupancy_stats_table_sub = occupancy_stats_table[occupancy_stats_table[pop_col] == pop]
+
+        # generate each unique test pair
+        tiles_threshold_trials = occupancy_stats_table_sub[
+            ["num_tiles", "positive_threshold"]
+        ].drop_duplicates().reset_index(drop=True)
+
+        # define a separate plot on the same grid for each test pair
+        fig, axs = plt.subplots(
+            tiles_threshold_trials.shape[0], 1, figsize=figsize
+        )
+        populations_str = f"{pop} cell populations"
+        # fig.suptitle(f"Percentage positive tile distributions for {populations_str}", fontsize=24)
+        # fig.subplots_adjust(top=1.50)
+
+        # define an index to iterate through the subplots
+        subplot_i = 0
+
+        for _, trial in tiles_threshold_trials.iterrows():
+            grid_size = int(np.sqrt(trial["num_tiles"]))
+            positive_threshold = trial["positive_threshold"]
+
+            # subset data obtained just for the specified trial
+            trial_data = occupancy_stats_table_sub[
+                (occupancy_stats_table_sub["num_tiles"] == trial["num_tiles"]) &
+                (occupancy_stats_table_sub["positive_threshold"] == trial["positive_threshold"])
+            ]
+
+            # visualize the distribution using a histogram
+            positive_tile_percentages = trial_data["percent_positive"].values
+            axs[subplot_i].hist(
+                positive_tile_percentages,
+                facecolor="g",
+                bins=20,
+                alpha=0.75
+            )
+            axs[subplot_i].set_title(
+                f"Grid size: {grid_size}x{grid_size}, Positive threshold: {positive_threshold}"
+            )
+
+            # update the subplot index
+            subplot_i += 1
+
+        plt.tight_layout()
+
+        # save the figure to save_dir
+        fig.savefig(
+            pathlib.Path(save_dir) / f"occupancy_statistic_distributions_{pop}.png",
+            dpi=300
+        )
+
+
+def visualize_occupancy_statistics_old(
+    cell_table: pd.DataFrame, save_dir: Union[str, pathlib.Path], max_image_size: int = 2048,
+    tiles_per_row_col: int = 8, positive_threshold: int = 20
+):
+    """Define occupancy statistics over a cohort, and visualize distribution of positive
+    tile counts across FOV.
+
+    Args:
+        cell_table (pd.DataFrame):
+            The cell table associated with the cohort, need to contain all FOVs
+        save_dir (Union[str, pathlib.Path]):
+            Directory to save the visualizations in
+        max_image_size (int):
+            The max image size to load in
+        tiles_per_row_col (int):
+            The row/col dims of tiles to define over each image
+        positive_threshold (int):
+            The cell count in a tile required for positivity
+    """
+    # get all the FOV names
+    fov_names: np.ndarray = cell_table["fov"].unique()
+
+    # define the tile size in pixels
+    tile_size: int = max_image_size // tiles_per_row_col
+
+    cell_table["tile_row"] = (cell_table["centroid-1"] // tile_size).astype(int)
+    cell_table["tile_col"] = (cell_table["centroid-0"] // tile_size).astype(int)
+
+    # Group by FOV and tile positions, then count occupancy
+    occupancy_counts: pd.DataFrame = cell_table.groupby(
+        ["fov", "tile_row", "tile_col"]
+    ).size().reset_index(name="occupancy")
+
+    # define the occupancy_stats array
+    occupancy_stats = xr.DataArray(
+        np.zeros((len(fov_names), tiles_per_row_col, tiles_per_row_col), dtype=np.int16),
+        coords=[fov_names, np.arange(tiles_per_row_col), np.arange(tiles_per_row_col)],
+        dims=["fov", "x", "y"]
+    )
+
+    # Update the DataArray based on occupancy_counts without explicit Python loops
+    for index, row in occupancy_counts.iterrows():
+        occupancy_stats.loc[row["fov"], row["tile_row"], row["tile_col"]] = row["occupancy"]
+
+    # define the tiles that are positive based on threshold
+    occupancy_stats_positivity: xr.DataArray = occupancy_stats > positive_threshold
+
+    # count number of positive tiles per FOV
+    fov_positive_tiles: xr.DataArray = occupancy_stats_positivity.sum(dim=["x", "y"])
+    fov_positive_tile_counts: Dict[str, int] = dict(
+        zip(fov_positive_tiles.fov.values, fov_positive_tiles.values)
+    )
+
+    # 1. visualize histogram of positive tile counts across cohort
+    fig, axs = plt.subplots(
+        3,
+        1,
+        figsize=(10, 10)
+    )
+    print(list(fov_positive_tile_counts.values()))
+    axs[0].hist(list(fov_positive_tile_counts.values()))
+    axs[0].set_title("Distribution of tile positivity")
+
+    # 2. visualize heatmap of average positivity of tiles across FOVs
+    cmap_positivity = mcolors.LinearSegmentedColormap.from_list("", ["white", "red"])
+    c_positivity = axs[1].imshow(
+        occupancy_stats_positivity.mean(dim="fov").values,
+        cmap=cmap_positivity,
+        aspect="auto",
+        vmin=0,
+        vmax=1
+    )
+    axs[1].set_xticks([])
+    axs[1].set_yticks([])
+    axs[1].set_title("Mean positivity per tile")
+    fig.colorbar(c_positivity, ax=axs[1], ticks=[0, 0.25, 0.5, 0.75, 1])
+
+    # 3. visualize heatmap of average number of counts across FOVs
+    cmap_counts = mcolors.LinearSegmentedColormap.from_list("", ["white", "red"])
+    max_occupancy_stat = np.round(np.max(occupancy_stats.mean(dim="fov").values), 2)
+    c_counts = axs[2].imshow(
+        occupancy_stats.mean(dim="fov").values,
+        cmap=cmap_counts,
+        aspect="auto",
+        vmin=0,
+        vmax=max_occupancy_stat
+    )
+    axs[2].set_xticks([])
+    axs[2].set_yticks([])
+    axs[2].set_title("Mean num cells per tile")
+    fig.colorbar(c_counts, ax=axs[2], ticks=np.round(np.linspace(0, max_occupancy_stat, 4), 2))
+
+    fig.suptitle(
+        f"Stats for # tiles: {tiles_per_row_col}, positive threshold: {positive_threshold}",
+        fontsize=24
+    )
+
+    # save the figure to save_dir
+    fig.savefig(
+        pathlib.Path(save_dir) / f"occupancy_stats_{tiles_per_row_col}_{positive_threshold}.png",
+        dpi=300
+    )
+
+
 def run_functional_marker_positivity_tuning_tests(
     cell_table: pd.DataFrame, save_dir: Union[str, pathlib.Path],
     marker_info: Dict[str, MarkerDict], threshold_mults: List[float]
diff --git a/python_files/utils.py b/python_files/utils.py
index cdd4192..c9e323a 100644
--- a/python_files/utils.py
+++ b/python_files/utils.py
@@ -11,6 +11,7 @@
 import pandas as pd
 from skimage import morphology
 from scipy.ndimage import gaussian_filter
+from scipy.stats import zscore
 from skimage.segmentation import find_boundaries
 from skimage.measure import label
 import skimage.io as io
@@ -229,6 +230,155 @@ def create_long_df_by_functional(func_table, cluster_col_name, drop_cols, result
     return long_df_all
 
 
+def occupancy_df_helper(cell_table, cluster_col_name, drop_cols, result_name, subset_col=None,
+                        tiles_per_row_col=4, max_image_size=2048, positive_threshold=0):
+    """Helper function for creating the unmelted occupancy table
+
+    Args:
+        cell_table (pd.DataFrame): the dataframe containing information on each cell
+        cluster_col_name (str): the column name in cell_table that contains the cluster information
+        drop_cols (list): list of columns to drop from cell_table
+        result_name (str): the name of this statistic in the returned df
+        subset_col (str): the name of the subset col to group by, if not provided then skip
+        tiles_per_row_col (int): the row/col dims of tiles to define over each image
+        max_image_size (int): maximum size of an image passed in, assuming square images
+        positive_threshold (int): the z-scored cell count in a tile required for positivity
+
+    Returns:
+        pd.DataFrame: occupancy stats in unmelted format"""
+
+    verify_in_list(cell_type_col=cluster_col_name, cell_table_columns=cell_table.columns)
+    verify_in_list(drop_cols=drop_cols, cell_table_columns=cell_table.columns)
+    if subset_col is not None:
+        verify_in_list(subset_col=subset_col, cell_table_columns=cell_table.columns)
+
+    # remove the drop columns
+    cell_table_small = cell_table.loc[:, ~cell_table.columns.isin(drop_cols)]
+
+    # get all the FOV names
+    fov_names = cell_table_small["fov"].unique()
+
+    # define the tile size in pixels for each FOV
+    tile_size = max_image_size // tiles_per_row_col
+
+    # define the tile for each cell
+    cell_table_small["tile_row"] = (cell_table_small["centroid-1"] // tile_size).astype(int)
+    cell_table_small["tile_col"] = (cell_table_small["centroid-0"] // tile_size).astype(int)
+
+    # Create a DataFrame with all combinations of fov, tile_row, tile_col, and pop_col
+    all_combos: pd.DataFrame = pd.MultiIndex.from_product(
+        [
+            cell_table_small["fov"].unique(),
+            cell_table_small["tile_row"].unique(),
+            cell_table_small["tile_col"].unique(),
+            cell_table_small[cluster_col_name].unique()
+        ],
+        names=["fov", "tile_row", "tile_col", cluster_col_name]
+    ).to_frame(index=False)
+
+    occupancy_count_groupby = ["fov", "tile_row", "tile_col", cluster_col_name]
+    if subset_col:
+        occupancy_group_by.append("subset_col")
+
+    # compute the total occupancy of each tile for each population type
+    # NOTE: need to merge this way to account for tiles with 0 counts
+    occupancy_counts = cell_table_small.groupby(
+        occupancy_count_groupby
+    ).size().reset_index(name="occupancy")
+    occupancy_counts = pd.merge(
+        all_combos, occupancy_counts,
+        on=occupancy_count_groupby,
+        how="left"
+    )
+    occupancy_counts["occupancy"].fillna(0, inplace=True)
+    # print(occupancy_counts[["tile_row", "tile_col"]].drop_duplicates())
+
+    # zscore the tile counts across the cohort grouped by the cell types
+    occupancy_counts["zscore_occupancy"] = occupancy_counts.groupby(
+        [cluster_col_name]
+    )["occupancy"].transform(lambda x: zscore(x, ddof=0))
+
+    # mark tiles as positive depending on the positive_threshold value
+    occupancy_counts["is_positive"] = occupancy_counts["zscore_occupancy"] > positive_threshold
+
+    # total up the positive tiles per FOV and cell type
+    occupancy_zscore_groupby = ["fov", cluster_col_name]
+    if subset_col is not None:
+        occupancy_zscore_groupby.append(subset_col)
+    occupancy_counts_grouped: pd.DataFrame = occupancy_counts.groupby(occupancy_zscore_groupby).apply(
+        lambda row: row["is_positive"].sum()
+    ).reset_index(name="total_positive_tiles")
+
+    # get the max image size, this will determine how many tiles there are when finding percentages
+    occupancy_counts_grouped["image_size"] = occupancy_counts_grouped.apply(
+        lambda row: io.imread(os.path.join(samples_dir, row["fov"], "Calprotectin.tiff")).shape[0],
+        axis=1
+    )
+    occupancy_stats_grouped["max_tiles"] = occupancy_stats_grouped.apply(
+        lambda row: tiles_per_row_col ** 2 ** (1 / (max_image_size / row["image_size"])),
+        axis=1
+    )
+
+    # determine the percent positivity of tiles
+    occupancy_counts_grouped["percent_positive_tiles"] = \
+        occupancy_stats_grouped["total_positive_tiles"] / occupancy_stats_grouped["max_tiles"]
+
+    # reshape to long df
+    occupancy_long_df = pd.melt(
+        occupancy_counts_grouped[occupancy_zscore_groupby + ["percent_positive_tiles"]],
+        id_vars=occupancy_zscore_groupby,
+        var_name='functional_marker'
+    )
+    occupancy_long_df['metric'] = result_name
+    occupancy_long_df = long_df.rename(columns={cluster_col_name: "cell_type"})
+    if subset_col is not None:
+        occupancy_long_df.rename(columns={subset_col: "subset"})
+
+    return occupancy_long_df
+
+
+def create_long_df_by_occupancy(cell_table, cluster_col_name, drop_cols, result_name,
+                                subset_col=None, tiles_per_row_col=4, max_image_size=2048,
+                                positive_threshold=0):
+    """Compute the occupancy statistics over a cohort, based on z-scored cell counts per tile 
+    across the cohort.
+
+    Args:
+        cell_table (pd.DataFrame): the dataframe containing information on each cell
+        cluster_col_name (str): the column name in cell_table that contains the cluster information
+        drop_cols (list): list of columns to drop from cell_table
+        result_name (str): the name of this statistic in the returned df
+        subset_col (str): the column name in cell_table to subset by
+        tiles_per_row_col (int): the row/col dims of tiles to define over each image
+        max_image_size (int): maximum size of an image passed in, assuming square images
+        positive_threshold (int): the z-scored cell count in a tile required for positivity
+
+    Returns:
+        pd.DataFrame: long format dataframe containing the summarized occupancy data"""
+
+    # first generate df without subsetting
+    drop_cols_all = drop_cols.copy()
+    if subset_col is not None:
+        drop_cols_all = drop_cols + [subset_col]
+
+    occupancy_long_all_compartments = occupancy_df_helper(
+        cell_table, cluster_col_name, drop_cols, result_name, None,
+        tiles_per_row_col, max_image_size, positive_threshold
+    )
+    long_df_all["subset"] = "all"
+
+    if subset_col is not None:
+        occupancy_long_per_compartment = occupancy_df_helper(
+            cell_table, cluster_col_name, drop_cols, result_name, subset_col,
+            tiles_per_row_col, max_image_size, positive_threshold
+        )
+        occupancy_long_comb = pd.concat(
+            [occupancy_long_all_compartments, occupancy_long_per_compartment]
+        )
+
+    return occupancy_long_comb
+
+
 def create_channel_mask(img, intensity_thresh, sigma, min_mask_size=0, max_hole_size=100000):
     """Generates a binary mask from a single channel image