From 8218a3ae6a4ef8954f030eac7e406c7350b564ef Mon Sep 17 00:00:00 2001 From: AngRodrigues Date: Tue, 3 Dec 2024 16:46:25 +1100 Subject: [PATCH 1/5] feat: do not accept any legacy files --- map2loop/config.py | 64 +----------------------- map2loop/mapdata.py | 2 +- map2loop/project.py | 25 +++++----- map2loop/sorter.py | 4 +- map2loop/utils.py | 117 ++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 132 insertions(+), 80 deletions(-) diff --git a/map2loop/config.py b/map2loop/config.py index d4811d97..7a19ddfa 100644 --- a/map2loop/config.py +++ b/map2loop/config.py @@ -135,65 +135,6 @@ def update_from_dictionary(self, dictionary: dict, lower: bool = False): if len(dictionary): logger.warning(f"Unused keys from config format {list(dictionary.keys())}") - @beartype.beartype - def update_from_legacy_file(self, file_map: dict, lower: bool = False): - """ - Update the config dictionary from the provided old version dictionary - - Args: - file_map (dict): The old version dictionary to update from - """ - - code_mapping = { - "otype": (self.structure_config, "orientation_type"), - "dd": (self.structure_config, "dipdir_column"), - "d": (self.structure_config, "dip_column"), - "sf": (self.structure_config, "description_column"), - "bedding": (self.structure_config, "bedding_text"), - "bo": (self.structure_config, "overturned_column"), - "btype": (self.structure_config, "overturned_text"), - "gi": (self.structure_config, "objectid_column"), - "c": (self.geology_config, "unitname_column"), - "u": (self.geology_config, "alt_unitname_column"), - "g": (self.geology_config, "group_column"), - "g2": (self.geology_config, "supergroup_column"), - "ds": (self.geology_config, "description_column"), - "min": (self.geology_config, "minage_column"), - "max": (self.geology_config, "maxage_column"), - "r1": (self.geology_config, "rocktype_column"), - "r2": (self.geology_config, "alt_rocktype_column"), - "sill": (self.geology_config, "sill_text"), - "intrusive": (self.geology_config, "intrusive_text"), - "volcanic": (self.geology_config, "volcanic_text"), - "f": (self.fault_config, "structtype_column"), - "fault": (self.fault_config, "fault_text"), - "fdipnull": (self.fault_config, "dip_null_value"), - "fdipdip_flag": (self.fault_config, "dipdir_flag"), - "fdipdir": (self.fault_config, "dipdir_column"), - "fdip": (self.fault_config, "dip_column"), - "fdipest": (self.fault_config, "dipestimate_column"), - "fdipest_vals": (self.fault_config, "dipestimate_text"), - "n": (self.fault_config, "name_column"), - "ff": (self.fold_config, "structtype_column"), - "fold": (self.fold_config, "fold_text"), - "t": (self.fold_config, "description_column"), - "syn": (self.fold_config, "synform_text"), - } - for code in code_mapping: - if code in file_map: - if lower is True: - file_map[code] = str(file_map[code]).lower() - code_mapping[code][0][code_mapping[code][1]] = file_map[code] - file_map.pop(code) - - if "o" in file_map: - self.structure_config["objectid_column"] = file_map["o"] - self.fault_config["objectid_column"] = file_map["o"] - self.fold_config["objectid_column"] = file_map["o"] - file_map.pop("o") - - if len(file_map) > 0: - logger.warning(f"Unused keys from legacy format {list(file_map.keys())}") @beartype.beartype def update_from_file( @@ -207,10 +148,7 @@ def update_from_file( legacy_format (bool, optional): Whether the JSON is an old version. Defaults to False. lower (bool, optional): convert keys to lowercase. Defaults to False. """ - if legacy_format: - func = self.update_from_legacy_file - else: - func = self.update_from_dictionary + func = self.update_from_dictionary try: filename = str(filename) diff --git a/map2loop/mapdata.py b/map2loop/mapdata.py index 83584587..e7d258d2 100644 --- a/map2loop/mapdata.py +++ b/map2loop/mapdata.py @@ -269,7 +269,7 @@ def set_config_filename( Whether the file is in m2lv2 form. Defaults to False. """ logger.info('Setting config filename to {filename}') - self.config_filename = filename + self.config.update_from_file(filename, legacy_format=legacy_format, lower=lower) logger.info(f"Config is: {self.config.to_dict()}") diff --git a/map2loop/project.py b/map2loop/project.py index 54926372..71b95c3a 100644 --- a/map2loop/project.py +++ b/map2loop/project.py @@ -70,7 +70,7 @@ def __init__( config_filename: Union[pathlib.Path, str] = "", config_dictionary: dict = {}, clut_filename: Union[pathlib.Path, str] = "", - clut_file_legacy: bool = False, + # clut_file_legacy: bool = False, save_pre_checked_map_data: bool = False, loop_project_filename: str = "", overwrite_loopprojectfile: bool = False, @@ -106,10 +106,8 @@ def __init__( The filename of the configuration json file to use (if not using config_dictionary). Defaults to "". config_dictionary (dict, optional): A dictionary version of the configuration file. Defaults to {}. - clut_filename (str, optional): + clut_filename (str, deprecated): The filename of the colour look up table to use. Defaults to "". - clut_file_legacy (bool, optional): - A flag to indicate if the clut file is in the legacy format. Defaults to False. save_pre_checked_map_data (bool, optional): A flag to save all map data to file before use. Defaults to False. loop_project_filename (str, optional): @@ -146,6 +144,11 @@ def __init__( self.fold_samples = pandas.DataFrame(columns=["ID", "X", "Y", "Z", "featureId"]) self.geology_samples = pandas.DataFrame(columns=["ID", "X", "Y", "Z", "featureId"]) + # check if user is using a config file or dictionary, if file, break the project. + if config_filename != "": + logger.error("Config legacy files have been deprecated in v3.2. Please use a dictionary instead.") + raise ValueError("Config legacy files have been deprecated in v3.2. Please use a dictionary instead.You can use the utils function update_from_legacy_file") + # Check for alternate config filenames in kwargs if "metadata_filename" in kwargs and config_filename == "": config_filename = kwargs["metadata_filename"] @@ -203,18 +206,12 @@ def __init__( if fault_orientation_filename != "": self.map_data.set_filename(Datatype.FAULT_ORIENTATION, fault_orientation_filename) - if config_filename != "": - if clut_file_legacy: - logger.warning( - "DEPRECATION: Legacy files are deprecated and their use will be removed in v3.2" - ) - - self.map_data.set_config_filename(config_filename, legacy_format=clut_file_legacy) - - if config_dictionary != {}: - self.map_data.config.update_from_dictionary(config_dictionary) if clut_filename != "": self.map_data.set_colour_filename(clut_filename) + + #set config dict + self.map_data.config.update_from_dictionary(config_dictionary) + # Load all data (both shape and raster) self.map_data.load_all_map_data() diff --git a/map2loop/sorter.py b/map2loop/sorter.py index cf444de6..42da4617 100644 --- a/map2loop/sorter.py +++ b/map2loop/sorter.py @@ -118,8 +118,8 @@ def sort( class SorterUseHint(SorterUseNetworkX): def __init__(self): - print( - "SorterUseHint is deprecated and will be removed in map2loop v3.2. Use SorterUseNetworkX instead" + logger.info( + "SorterUseHint is deprecated in v3.2. Use SorterUseNetworkX instead" ) super().__init__() diff --git a/map2loop/utils.py b/map2loop/utils.py index e00c0f91..a94f29d4 100644 --- a/map2loop/utils.py +++ b/map2loop/utils.py @@ -5,6 +5,11 @@ import beartype from typing import Union, Optional, Dict import pandas +import re +import json + +from .logging import getLogger +logger = getLogger(__name__) @beartype.beartype @@ -401,3 +406,115 @@ def calculate_minimum_fault_length( # Return the square root of the threshold area as the minimum fault length return threshold_area**0.5 + + +def preprocess_hjson_to_json(hjson_content): + # Remove comments + hjson_content = re.sub(r'#.*', '', hjson_content) + hjson_content = re.sub(r'//.*', '', hjson_content) + # Replace single quotes with double quotes + hjson_content = re.sub(r"(? dict: + try: + # Read the file + with open(file_path, "r", encoding="utf-8") as file: + hjson_content = file.read() + if not hjson_content.strip(): + raise ValueError("The HJSON file is empty.") + # Preprocess HJSON to JSON + preprocessed_content = preprocess_hjson_to_json(hjson_content) + # Parse JSON + return json.loads(preprocessed_content) + except FileNotFoundError as e: + raise FileNotFoundError(f"HJSON file not found: {file_path}") from e + except json.JSONDecodeError as e: + raise ValueError(f"Failed to decode preprocessed HJSON as JSON: {e}") from e + + +def update_from_legacy_file( + filename: str, + json_save_path: Optional[str] = None, + lower: bool = False +) -> Optional[Dict[str, Dict]]: + """ + Update the config dictionary from the provided old version dictionary + Args: + file_map (dict): The old version dictionary to update from + """ + # only import config if needed + from .config import Config + file_map = Config() + + code_mapping = { + "otype": (file_map.structure_config, "orientation_type"), + "dd": (file_map.structure_config, "dipdir_column"), + "d": (file_map.structure_config, "dip_column"), + "sf": (file_map.structure_config, "description_column"), + "bedding": (file_map.structure_config, "bedding_text"), + "bo": (file_map.structure_config, "overturned_column"), + "btype": (file_map.structure_config, "overturned_text"), + "gi": (file_map.structure_config, "objectid_column"), + "c": (file_map.geology_config, "unitname_column"), + "u": (file_map.geology_config, "alt_unitname_column"), + "g": (file_map.geology_config, "group_column"), + "g2": (file_map.geology_config, "supergroup_column"), + "ds": (file_map.geology_config, "description_column"), + "min": (file_map.geology_config, "minage_column"), + "max": (file_map.geology_config, "maxage_column"), + "r1": (file_map.geology_config, "rocktype_column"), + "r2": (file_map.geology_config, "alt_rocktype_column"), + "sill": (file_map.geology_config, "sill_text"), + "intrusive": (file_map.geology_config, "intrusive_text"), + "volcanic": (file_map.geology_config, "volcanic_text"), + "f": (file_map.fault_config, "structtype_column"), + "fault": (file_map.fault_config, "fault_text"), + "fdipnull": (file_map.fault_config, "dip_null_value"), + "fdipdip_flag": (file_map.fault_config, "dipdir_flag"), + "fdipdir": (file_map.fault_config, "dipdir_column"), + "fdip": (file_map.fault_config, "dip_column"), + "fdipest": (file_map.fault_config, "dipestimate_column"), + "fdipest_vals": (file_map.fault_config, "dipestimate_text"), + "n": (file_map.fault_config, "name_column"), + "ff": (file_map.fold_config, "structtype_column"), + "fold": (file_map.fold_config, "fold_text"), + "t": (file_map.fold_config, "description_column"), + "syn": (file_map.fold_config, "synform_text"), + } + # try and ready the file: + try: + parsed_data = read_hjson_with_json(filename) + except Exception as e: + logger.error(f"Error reading file {filename}: {e}") + return + #map the keys + file_map = file_map.to_dict() + for legacy_key, new_mapping in code_mapping.items(): + if legacy_key in parsed_data: + section, new_key = new_mapping + value = parsed_data[legacy_key] + if lower and isinstance(value, str): + value = value.lower() + section[new_key] = value + + if "o" in parsed_data: + object_id_value = parsed_data["o"] + if lower and isinstance(object_id_value, str): + object_id_value = object_id_value.lower() + file_map['structure']["objectid_column"] = object_id_value + file_map['geology']["objectid_column"] = object_id_value + file_map['fold']["objectid_column"] = object_id_value + + if json_save_path is not None: + with open(json_save_path, "w") as f: + json.dump(parsed_data, f, indent=4) + + return file_map \ No newline at end of file From 8d926ef6a9bbefc43a4797b1167d2d0560192889 Mon Sep 17 00:00:00 2001 From: AngRodrigues Date: Wed, 4 Dec 2024 09:58:29 +1100 Subject: [PATCH 2/5] chore: small typo --- map2loop/project.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/map2loop/project.py b/map2loop/project.py index 8302c9cb..027bd019 100644 --- a/map2loop/project.py +++ b/map2loop/project.py @@ -148,7 +148,7 @@ def __init__( # check if user is using a config file or dictionary, if file, break the project. if config_filename != "": logger.error("Config legacy files have been deprecated in v3.2. Please use a dictionary instead.") - raise ValueError("Config legacy files have been deprecated in v3.2. Please use a dictionary instead.You can use the utils function update_from_legacy_file") + raise ValueError("Config legacy files have been deprecated in v3.2. Please use a config dictionary instead. You can use the utils function update_from_legacy_file") # Check for alternate config filenames in kwargs if "metadata_filename" in kwargs and config_filename == "": From 5830ef70f9209ce8591a85b6b3dc9a6f1b5ed803 Mon Sep 17 00:00:00 2001 From: Lachlan Grose Date: Wed, 4 Dec 2024 10:22:26 +1100 Subject: [PATCH 3/5] remove legacy format arguments --- map2loop/config.py | 5 +---- map2loop/mapdata.py | 10 +++++----- tests/project/test_plot_hamersley.py | 1 - 3 files changed, 6 insertions(+), 10 deletions(-) diff --git a/map2loop/config.py b/map2loop/config.py index 7a19ddfa..5755957c 100644 --- a/map2loop/config.py +++ b/map2loop/config.py @@ -138,14 +138,13 @@ def update_from_dictionary(self, dictionary: dict, lower: bool = False): @beartype.beartype def update_from_file( - self, filename: Union[pathlib.Path, str], legacy_format: bool = False, lower: bool = False + self, filename: Union[pathlib.Path, str], lower: bool = False ): """ Update the config dictionary from the provided json filename or url Args: filename (Union[pathlib.Path, str]): Filename or URL of the JSON config file - legacy_format (bool, optional): Whether the JSON is an old version. Defaults to False. lower (bool, optional): convert keys to lowercase. Defaults to False. """ func = self.update_from_dictionary @@ -207,7 +206,5 @@ def update_from_file( err_string += "Please check the file is accessible online and then\n" else: err_string += "Please check the file exists and is accessible then\n" - if not legacy_format: - err_string += "Also check if this is a legacy config file and add clut_file_legacy=True to the Project function\n" err_string += "Check the contents for mismatched quotes or brackets!" raise Exception(err_string) diff --git a/map2loop/mapdata.py b/map2loop/mapdata.py index fabb3727..59fcd3b3 100644 --- a/map2loop/mapdata.py +++ b/map2loop/mapdata.py @@ -255,7 +255,7 @@ def get_filename(self, datatype: Datatype): @beartype.beartype def set_config_filename( - self, filename: Union[pathlib.Path, str], legacy_format: bool = False, lower: bool = False + self, filename: Union[pathlib.Path, str], lower: bool = False ): """ Set the config filename and update the config structure @@ -263,12 +263,12 @@ def set_config_filename( Args: filename (str): The filename of the config file - legacy_format (bool, optional): - Whether the file is in m2lv2 form. Defaults to False. + lower (bool, optional): + Flag to convert the config file to lowercase. Defaults to False. """ logger.info('Setting config filename to {filename}') - self.config.update_from_file(filename, legacy_format=legacy_format, lower=lower) + self.config.update_from_file(filename, lower=lower) logger.info(f"Config is: {self.config.to_dict()}") def get_config_filename(self): @@ -399,7 +399,7 @@ def set_filenames_from_australian_state(self, state: str): else: self.set_config_filename( - AustraliaStateUrls.aus_config_urls[state], legacy_format=False, lower=lower + AustraliaStateUrls.aus_config_urls[state], lower=lower ) self.set_colour_filename(AustraliaStateUrls.aus_clut_urls[state]) else: diff --git a/tests/project/test_plot_hamersley.py b/tests/project/test_plot_hamersley.py index 7be0bb55..07393f27 100644 --- a/tests/project/test_plot_hamersley.py +++ b/tests/project/test_plot_hamersley.py @@ -23,7 +23,6 @@ def create_project(state_data="WA", projection="EPSG:28350"): use_australian_state_data=state_data, working_projection=projection, bounding_box=bbox_3d, - clut_file_legacy=False, verbose_level=VerboseLevel.NONE, loop_project_filename=loop_project_filename, overwrite_loopprojectfile=True, From b265b3b6efbfd784937667e3c6764daf006af50a Mon Sep 17 00:00:00 2001 From: Lachlan Grose Date: Wed, 4 Dec 2024 10:26:03 +1100 Subject: [PATCH 4/5] add config file path back --- map2loop/project.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/map2loop/project.py b/map2loop/project.py index 027bd019..1dd832b6 100644 --- a/map2loop/project.py +++ b/map2loop/project.py @@ -70,7 +70,6 @@ def __init__( config_filename: Union[pathlib.Path, str] = "", config_dictionary: dict = {}, clut_filename: Union[pathlib.Path, str] = "", - # clut_file_legacy: bool = False, save_pre_checked_map_data: bool = False, loop_project_filename: str = "", overwrite_loopprojectfile: bool = False, @@ -145,11 +144,7 @@ def __init__( self.fold_samples = pandas.DataFrame(columns=["ID", "X", "Y", "Z", "featureId"]) self.geology_samples = pandas.DataFrame(columns=["ID", "X", "Y", "Z", "featureId"]) - # check if user is using a config file or dictionary, if file, break the project. - if config_filename != "": - logger.error("Config legacy files have been deprecated in v3.2. Please use a dictionary instead.") - raise ValueError("Config legacy files have been deprecated in v3.2. Please use a config dictionary instead. You can use the utils function update_from_legacy_file") - + # Check for alternate config filenames in kwargs if "metadata_filename" in kwargs and config_filename == "": config_filename = kwargs["metadata_filename"] @@ -207,11 +202,17 @@ def __init__( if fault_orientation_filename != "": self.map_data.set_filename(Datatype.FAULT_ORIENTATION, fault_orientation_filename) + if config_filename != "": + + + self.map_data.set_config_filename(config_filename) + + if config_dictionary != {}: + self.map_data.config.update_from_dictionary(config_dictionary) if clut_filename != "": self.map_data.set_colour_filename(clut_filename) - #set config dict - self.map_data.config.update_from_dictionary(config_dictionary) + # Load all data (both shape and raster) self.map_data.load_all_map_data() From bc2f2ff1ca14645f935eab810c40da7d11ea21e7 Mon Sep 17 00:00:00 2001 From: AngRodrigues Date: Thu, 5 Dec 2024 13:27:16 +1100 Subject: [PATCH 5/5] fix: allow json files, check for old keys --- map2loop/config.py | 64 ++++++++++++++++++++++++++++++++++++++++++++- map2loop/mapdata.py | 3 ++- map2loop/project.py | 7 +++-- 3 files changed, 68 insertions(+), 6 deletions(-) diff --git a/map2loop/config.py b/map2loop/config.py index 5755957c..48d017d3 100644 --- a/map2loop/config.py +++ b/map2loop/config.py @@ -93,13 +93,19 @@ def to_dict(self): } @beartype.beartype - def update_from_dictionary(self, dictionary: dict, lower: bool = False): + def update_from_dictionary(self, dictionary: dict, lower: bool = True): """ Update the config dictionary from a provided dict Args: dictionary (dict): The dictionary to update from """ + # make sure dictionary doesn't contain legacy keys + self.check_for_legacy_keys(dictionary) + + # make sure it has the minimum requirements + self.validate_config_dictionary(dictionary) + if "structure" in dictionary: self.structure_config.update(dictionary["structure"]) for key in dictionary["structure"].keys(): @@ -108,6 +114,7 @@ def update_from_dictionary(self, dictionary: dict, lower: bool = False): f"Config dictionary structure segment contained {key} which is not used" ) dictionary.pop("structure") + if "geology" in dictionary: self.geology_config.update(dictionary["geology"]) for key in dictionary["geology"].keys(): @@ -208,3 +215,58 @@ def update_from_file( err_string += "Please check the file exists and is accessible then\n" err_string += "Check the contents for mismatched quotes or brackets!" raise Exception(err_string) + + @beartype.beartype + def validate_config_dictionary(self, config_dict: dict) -> None: + """ + Validate the structure and keys of the configuration dictionary. + + Args: + config_dict (dict): The config dictionary to validate. + + Raises: + ValueError: If the dictionary does not meet the minimum requirements for ma2p2loop. + """ + required_keys = { + "structure": {"dipdir_column", "dip_column"}, + "geology": {"unitname_column", "alt_unitname_column"}, + } + + for section, keys in required_keys.items(): + if section not in config_dict: + logger.error(f"Missing required section '{section}' in config dictionary.") + raise ValueError(f"Missing required section '{section}' in config dictionary.") + + for key in keys: + if key not in config_dict[section]: + logger.error( + f"Missing required key '{key}' for '{section}' section of the config dictionary." + ) + raise ValueError( + f"Missing required key '{key}' for '{section}' section of the config dictionary." + ) + + @beartype.beartype + def check_for_legacy_keys(self, config_dict: dict) -> None: + + legacy_keys = { + "otype", "dd", "d", "sf", "bedding", "bo", "btype", "gi", "c", "u", + "g", "g2", "ds", "min", "max", "r1", "r2", "sill", "intrusive", "volcanic", + "f", "fdipnull", "fdipdip_flag", "fdipdir", "fdip", "fdipest", + "fdipest_vals", "n", "ff", "t", "syn" + } + + # Recursively search for keys in the dictionary + def check_keys(d: dict, parent_key=""): + for key, value in d.items(): + if key in legacy_keys: + logger.error( + f"Legacy key found in config - '{key}' at '{parent_key + key}'. Please use the new config format. Use map2loop.utils.update_from_legacy_file to convert between the formats if needed" + ) + raise ValueError( + f"Legacy key found in config - '{key}' at '{parent_key + key}'. Please use the new config format. Use map2loop.utils.update_from_legacy_file to convert between the formats if needed" + ) + if isinstance(value, dict): + check_keys(value, parent_key=f"{parent_key}{key}.") + + check_keys(config_dict) \ No newline at end of file diff --git a/map2loop/mapdata.py b/map2loop/mapdata.py index 59fcd3b3..c1c8b653 100644 --- a/map2loop/mapdata.py +++ b/map2loop/mapdata.py @@ -267,8 +267,9 @@ def set_config_filename( Flag to convert the config file to lowercase. Defaults to False. """ logger.info('Setting config filename to {filename}') - + self.config.update_from_file(filename, lower=lower) + logger.info(f"Config is: {self.config.to_dict()}") def get_config_filename(self): diff --git a/map2loop/project.py b/map2loop/project.py index 1dd832b6..39aac197 100644 --- a/map2loop/project.py +++ b/map2loop/project.py @@ -105,7 +105,7 @@ def __init__( The filename of the configuration json file to use (if not using config_dictionary). Defaults to "". config_dictionary (dict, optional): A dictionary version of the configuration file. Defaults to {}. - clut_filename (str, deprecated): + clut_filename (str, optional): The filename of the colour look up table to use. Defaults to "". save_pre_checked_map_data (bool, optional): A flag to save all map data to file before use. Defaults to False. @@ -201,14 +201,13 @@ def __init__( self.map_data.set_filename(Datatype.DTM, dtm_filename) if fault_orientation_filename != "": self.map_data.set_filename(Datatype.FAULT_ORIENTATION, fault_orientation_filename) - + if config_filename != "": - - self.map_data.set_config_filename(config_filename) if config_dictionary != {}: self.map_data.config.update_from_dictionary(config_dictionary) + if clut_filename != "": self.map_data.set_colour_filename(clut_filename)