From 7a894b3c98de404156a180ebce81a1d9590f14dc Mon Sep 17 00:00:00 2001 From: Dimitri Vasdekis Date: Tue, 16 Dec 2025 05:11:45 +0000 Subject: [PATCH 1/2] Add Azure credential detection hook Implements detect-azure-credentials hook based on Microsoft AzSK credential patterns. Features: - 33 credential detection patterns aligned with official AzSK specification - Covers Azure-specific credentials (Data Factory, Storage, DevOps, etc.) - Detects generic secrets (passwords, API keys, tokens, certificates) - Comprehensive test suite with 22 test cases - Pattern name identification in output for easier debugging Patterns include (CSCAN codes): - Azure Data Factory SHIR keys (0010) - Azure Storage credentials (0020, 0040) - App Service deployment secrets (0050) - Connection strings and passwords (0090, 0100, 0120) - Machine keys and network credentials (0130, 0150) - DevOps PAT tokens (0160) - PublishSettings passwords (0030) - PEM private keys (0060) - Git credentials (0210) - JWT and refresh tokens (0250) - Ansible Vault (0260) - Azure PowerShell token cache (0270) - Default/known passwords (0140) All tests passing (22 Azure + 36 AWS = 58 total) --- pre_commit_hooks/detect_azure_credentials.py | 287 +++++++++++++++++++ setup.cfg | 1 + testing/resources/azure_credentials.txt | 94 ++++++ testing/resources/azure_no_credentials.txt | 5 + tests/detect_azure_credentials_test.py | 168 +++++++++++ 5 files changed, 555 insertions(+) create mode 100644 pre_commit_hooks/detect_azure_credentials.py create mode 100644 testing/resources/azure_credentials.txt create mode 100644 testing/resources/azure_no_credentials.txt create mode 100644 tests/detect_azure_credentials_test.py diff --git a/pre_commit_hooks/detect_azure_credentials.py b/pre_commit_hooks/detect_azure_credentials.py new file mode 100644 index 00000000..2085add4 --- /dev/null +++ b/pre_commit_hooks/detect_azure_credentials.py @@ -0,0 +1,287 @@ +from __future__ import annotations + +import argparse +import re +from collections.abc import Sequence +from typing import NamedTuple + + +class BadFile(NamedTuple): + filename: str + key: str + pattern_name: str + + +# Patterns based on https://github.com/gitleaks/gitleaks/pull/1291 +# Azure Data Factory SHIR Key format: IR@{GUID}@{resource_name}@{location}@{base64} +AZURE_DATAFACTORY_SHIR_PATTERN = re.compile( + rb"IR@[0-9a-zA-Z-]{36}@[^@\s]+@[0-9a-zA-Z\-=]*@[A-Za-z0-9+/=]{44}", +) + +# CSCAN0020, CSCAN0030 - App service deployment secrets +AZURE_APP_SERVICE_DEPLOYMENT_PATTERN = re.compile( + rb"MII[a-zA-Z0-9=_\-]{200,}", +) + +# CSCAN0030, CSCAN0090, CSCAN0150 - Storage credentials (86 char) +AZURE_STORAGE_86CHAR_PATTERN = re.compile( + rb"[ \t]{0,10}[a-zA-Z0-9/+]{86}==", +) + +# CSCAN0030, CSCAN0090, CSCAN0150 - Storage credentials (43 char) +AZURE_STORAGE_43CHAR_PATTERN = re.compile( + rb"[a-zA-Z0-9/+]{43}=[^{@\d%\s]", +) + +# CSCAN0030, CSCAN0090, CSCAN0150 - SAS/sig tokens +AZURE_STORAGE_SIG_PATTERN = re.compile( + rb"(?:sig|sas|password)=[a-zA-Z0-9%]{43,53}%3[dD]", + re.IGNORECASE, +) + +# CSCAN0030 - Storage credential with userid/password +AZURE_STORAGE_USERIDPW_PATTERN = re.compile( + rb'(?:user ?(?:id|name)|uid)=.{2,128}?\s*;\s*(?:password|pwd)=[^\'\$%>@\'";\[{][^;"\']{2,350}?[;"\']', + re.IGNORECASE, +) + +# CSCAN0030 - AccountKey with MII prefix +AZURE_STORAGE_ACCOUNTKEY_PATTERN = re.compile( + rb"AccountKey\s*=\s*MII[a-zA-Z0-9/+]{43,}={0,2}", + re.IGNORECASE, +) + +# CSCAN0100 - Service Bus SharedAccessKey +AZURE_STORAGE_SERVICEBUS_PATTERN = re.compile( + rb' elements with keys/secrets +AZURE_PASSWORD_ADDKEY_PATTERN = re.compile( + rb'[^<]+", +) + +# CSCAN0110 - Script passwords in PowerShell/CMD +SCRIPT_PASSWORD_PATTERN = re.compile( + rb'\s-([pP]ass[wW]ord|PASSWORD)\s+(["\'][^"\'\r\n]*["\']|[^$\(\)\[\{<\-\r\n]+\s*(\r\n|\-))', +) + +# CSCAN0111 - General password patterns +GENERAL_PASSWORD_PATTERN = re.compile( + rb'[a-zA-Z_\s](([pP]ass[wW]ord)|PASSWORD|([cC]lient|CLIENT|[aA]pp|APP)_?([sS]ecret|SECRET))\s{0,3}=\s{0,3}[\'"][^\s"\']{2,200}?[\'"][;\s]', +) + +# CSCAN0210 - Git credentials +GIT_CREDENTIALS_PATTERN = re.compile( + rb"[hH][tT][tT][pP][sS]?://.+:.+@[^/]+\.[cC][oO][mM]", +) + +# CSCAN0220 - Password contexts (ConvertTo-SecureString, X509Certificate2, etc.) +PASSWORD_CONTEXT_PATTERN = re.compile( + rb'([cC]onvert[tT]o-[sS]ecure[sS]tring(\s*-[sS]tring)?\s*"[^"\r\n]+"|new\sX509Certificate2\([^()]*,\s*"[^"\r\n]+"|<[pP]ass[wW]ord>(<[vV]alue>)?.+()?|([cC]lear[tT]ext[pP]ass[wW]ord|CLEARTEXTPASSWORD)("?)?\s*[:=]\s*"[^"\r\n]+")', +) + +# CSCAN0230 - Slack tokens +SLACK_TOKEN_PATTERN = re.compile( + rb"xoxp-[a-zA-Z0-9]+-[a-zA-Z0-9]+-[a-zA-Z0-9]+-[a-zA-Z0-9]+|xoxb-[a-zA-Z0-9]+-[a-zA-Z0-9]+", +) + +# CSCAN0250 - OAuth/JWT tokens and refresh tokens +JWT_TOKEN_PATTERN = re.compile( + rb"eyJ[a-zA-Z0-9\-_%]+\.eyJ[a-zA-Z0-9\-_%]+\.[a-zA-Z0-9\-_%]+", +) + +REFRESH_TOKEN_PATTERN = re.compile( + rb'([rR]efresh_?[tT]oken|REFRESH_?TOKEN)["\']?\s*[:=]\s*["\']?([a-zA-Z0-9_]+-)+[a-zA-Z0-9_]+["\']?', +) + +# CSCAN0260 - Ansible Vault (corrected from CSCAN0270) +ANSIBLE_VAULT_PATTERN = re.compile( + rb"\$ANSIBLE_VAULT;[0-9]\.[0-9];AES256[\r\n]+\d+", +) + +# CSCAN0270 - Azure PowerShell Token Cache +AZURE_POWERSHELL_TOKEN_PATTERN = re.compile( + rb'["\']TokenCache["\']\s*:\s*\{\s*["\']CacheData["\']\s*:\s*["\'][a-zA-Z0-9/\+]{86}', +) + +# CSCAN0140 - Default/known passwords +DEFAULT_PASSWORDS_PATTERN = re.compile( + rb"(T!T@n1130|[pP]0rsche911|[cC]o[mM][mM]ac\!12|[pP][aA]ss@[wW]or[dD]1|[rR]dP[aA]\$\$[wW]0r[dD]|iis6\!dfu|[pP]@ss[wW]or[dD]1|[pP][aA]\$\$[wW]or[dD]1|\!\!123ab|[aA]dmin123|[pP]@ss[wW]0r[dD]1|[uU]ser@123|[aA]bc@123|[pP][aA]ss[wW]or[dD]@123|homerrocks|[pP][aA]\$\$[wW]0r[dD]1?|Y29NbWFjITEy|[pP][aA]ss4Sales|WS2012R2R0cks\!|DSFS0319Test|March2010M2\!|[pP][aA]ss[wW]ord~1|[mM]icr0s0ft|test1test\!|123@tieorg|homerocks|[eE]lvis1)", +) + + +PATTERNS = [ + ("datafactory-shir", AZURE_DATAFACTORY_SHIR_PATTERN), + ("app-service-deployment", AZURE_APP_SERVICE_DEPLOYMENT_PATTERN), + ("publishsettings-pwd", PUBLISHSETTINGS_PWD_PATTERN), + ("storage-86char", AZURE_STORAGE_86CHAR_PATTERN), + ("storage-43char", AZURE_STORAGE_43CHAR_PATTERN), + ("storage-sig", AZURE_STORAGE_SIG_PATTERN), + ("storage-useridpw", AZURE_STORAGE_USERIDPW_PATTERN), + ("storage-accountkey", AZURE_STORAGE_ACCOUNTKEY_PATTERN), + ("storage-servicebus", AZURE_STORAGE_SERVICEBUS_PATTERN), + ("storage-moniker", AZURE_STORAGE_MONIKER_PATTERN), + ("storage-bloburl", AZURE_STORAGE_BLOBURL_PATTERN), + ("password-machinekey", AZURE_PASSWORD_MACHINEKEY_PATTERN), + ("password-addkey", AZURE_PASSWORD_ADDKEY_PATTERN), + ("password-connstring", AZURE_PASSWORD_CONNSTRING_PATTERN), + ("password-value", AZURE_PASSWORD_VALUE_PATTERN), + ("password-uidpw", AZURE_PASSWORD_UIDPW_PATTERN), + ("network-credential", AZURE_NETWORK_CREDENTIAL_PATTERN), + ("network-schtasks", AZURE_NETWORK_SCHTASKS_PATTERN), + ("network-dotnet", AZURE_NETWORK_DOTNET_PATTERN), + ("devtfvc-secrets", AZURE_DEVTFVC_PATTERN), + ("devops-pat", AZURE_DEVOPS_PAT_PATTERN), + ("pem-private-key", PEM_PRIVATE_KEY_PATTERN), + ("security-config-password", SECURITY_CONFIG_PASSWORD_PATTERN), + ("script-password", SCRIPT_PASSWORD_PATTERN), + ("general-password", GENERAL_PASSWORD_PATTERN), + ("git-credentials", GIT_CREDENTIALS_PATTERN), + ("password-context", PASSWORD_CONTEXT_PATTERN), + ("slack-token", SLACK_TOKEN_PATTERN), + ("jwt-token", JWT_TOKEN_PATTERN), + ("refresh-token", REFRESH_TOKEN_PATTERN), + ("ansible-vault", ANSIBLE_VAULT_PATTERN), + ("azure-powershell-token", AZURE_POWERSHELL_TOKEN_PATTERN), + ("default-passwords", DEFAULT_PASSWORDS_PATTERN), +] + + +def check_file_for_azure_keys( + filenames: Sequence[str], +) -> list[BadFile]: + """Check if files contain Azure credentials. + + Return a list of all files containing Azure credentials with the keys + obfuscated to ease debugging. + """ + bad_files = [] + + for filename in filenames: + with open(filename, "rb") as content: + text_body = content.read() + + # Check all Azure credential patterns + for pattern_name, pattern in PATTERNS: + matches = pattern.findall(text_body) + for match in matches: + # Handle tuple results from regex groups + if isinstance(match, tuple): + match = match[0] + + # Obfuscate the key + key_str = match.decode("utf-8", errors="replace") + if len(key_str) > 20: + key_hidden = key_str[:10] + "***" + key_str[-7:] + else: + key_hidden = key_str[:4] + "***" + + bad_files.append( + BadFile(filename, key_hidden, pattern_name), + ) + + return bad_files + + +def main(argv: Sequence[str] | None = None) -> int: + parser = argparse.ArgumentParser() + parser.add_argument("filenames", nargs="+", help="Filenames to run") + args = parser.parse_args(argv) + + bad_filenames = check_file_for_azure_keys(args.filenames) + if bad_filenames: + for bad_file in bad_filenames: + print( + f"Azure credential ({bad_file.pattern_name}) found in " + f"{bad_file.filename}: {bad_file.key}", + ) + return 1 + else: + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/setup.cfg b/setup.cfg index d91f4399..e4e6347b 100644 --- a/setup.cfg +++ b/setup.cfg @@ -46,6 +46,7 @@ console_scripts = debug-statement-hook = pre_commit_hooks.debug_statement_hook:main destroyed-symlinks = pre_commit_hooks.destroyed_symlinks:main detect-aws-credentials = pre_commit_hooks.detect_aws_credentials:main + detect-azure-credentials = pre_commit_hooks.detect_azure_credentials:main detect-private-key = pre_commit_hooks.detect_private_key:main double-quote-string-fixer = pre_commit_hooks.string_fixer:main end-of-file-fixer = pre_commit_hooks.end_of_file_fixer:main diff --git a/testing/resources/azure_credentials.txt b/testing/resources/azure_credentials.txt new file mode 100644 index 00000000..169e853e --- /dev/null +++ b/testing/resources/azure_credentials.txt @@ -0,0 +1,94 @@ +# Azure Data Factory SHIR Key +shir_key = IR@40040abc-b2f2-8tyg-ab39-90a490zzzaae@adf-myapp-001@we@uUY/w9WdKTdAWWPDMrEEWdAEZIgeXlrO51GtVUR1/BE= + + +# App Service Deployment Secret (MII prefix with 200+ chars) +publishSettings = MIIKcQIBAzCCCi0GCSqGSIb3DQEHAaCCCh4EggoaMIIKFjCCBg8GCSqGSIb3DQEHAaCCBgAEggX8MIIF-DCCBfQGCyqGSIb3DQEMCgECoIIE_jCCBPowHAYKKoZIhvcNAQwBAzAOBAhxV7RdBQMKzQICB9AEggTYJNhZHR7GBQrpJFJGGn8gNKN4SB9hqXCwHchZf5LaAbCdEfGhIjKlMnOpQrStUvWxYzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdefghijklmnopqrstuvwxyz + + +# Azure Storage Credentials - 86 char +DefaultEndpointsProtocol=https;AccountName=mystorageaccount;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw== + + +# Azure Storage Credentials - 43 char +StorageKey=YXphMTIzNDU2Nzg5MGFiY2RlZmdoaWprbG1ub3BxcnM=x + + +# Azure Blob SAS URL +blob_url = https://myaccount.blob.core.windows.net/mycontainer/myblob.txt?sv=2020-08-04&ss=bfqt&srt=sco&sp=rwdlacupitfx&se=2025-12-31T23:59:59Z&st=2025-01-01T00:00:00Z&spr=https&sig=AbCdEfGhIjKlMnOpQrStUvWxYz0123456789%2B%2F%3D + + +# Connection string with userid and password +userid=admin_user;password=MySecretP@ssw0rd123!;Server=tcp:myserver.database.windows.net;Database=mydb + + +# Machine Key Configuration + + + +# Connection String with password +connectionstring='Server=myserver.database.windows.net;Database=mydb;User Id=admin;password=MyP@ssw0rd123;' + + +# NetworkCredential with domain +var cred = NetworkCredential(username, password, redmond); + + +# DevOps Personal Access Token +access_token='a1b2c3d4e5f6g7h8i9j0k1l2m3n4o5p6q7r8s9t0u1v2w3x4y5z6' + + +# Multiple credential types in production config example +# Data Factory SHIR +SHIR=IR@12345678-1234-5678-1234-567890abcdef@adf-prod-001@eastus@ABC123XYZ789+/aBcDeFgHiJkLmNoPqRsTuVwXyZ01= + +# Storage Account Key +STORAGE_KEY=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw== + +# Connection string with password +DB_CONN=userid=dbadmin;password=MySecretPassword123;Server=myserver.database.windows.net + +# PublishSettings Password (CSCAN0030) + + +# PEM Private Key (CSCAN0060) +-----BEGIN RSA PRIVATE KEY----- +MIIEpAIBAAKCAQEA1234567890abcdefghijklmnopqrstuvwxyz +-----END RSA PRIVATE KEY----- + +# Security Config Password (CSCAN0080) + + MySecretPassword123 + + +# Script Password (CSCAN0110) +# PowerShell example: +Connect-AzAccount -Password "MyP@ssw0rd123" -Credential $cred + +# General Password Pattern (CSCAN0111) +client_secret = "myappsecret12345"; +app_SECRET = "anothersecret67890"; + +# Git Credentials (CSCAN0210) +https://username:password123@github.com/repo/project.git + +# Password Context (CSCAN0220) +$securePassword = ConvertTo-SecureString "MyP@ssw0rd!" -AsPlainText -Force +$cert = new X509Certificate2("cert.pfx", "CertP@ssw0rd") + +# JWT Token (CSCAN0250) +token=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiaWF0IjoxNTE2MjM5MDIyfQ.SflKxwRJSMeKKF2QT4fwpMeJf36POk6yJV_adQssw5c + +# Refresh Token (CSCAN0250) +refresh_token="1/abc-def-ghi-jkl-mno-pqr" + +# Ansible Vault (CSCAN0260 - corrected) +$ANSIBLE_VAULT;1.1;AES256 +12345678901234567890123456789012 + +# Azure PowerShell Token Cache (CSCAN0270) +{"TokenCache": {"CacheData": "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789ABCDEFGHIJKLMNOPQR=="}} + +# Default Known Passwords (CSCAN0140) +password = "P@ssw0rd1" +admin_password = "admin123" diff --git a/testing/resources/azure_no_credentials.txt b/testing/resources/azure_no_credentials.txt new file mode 100644 index 00000000..95addd6a --- /dev/null +++ b/testing/resources/azure_no_credentials.txt @@ -0,0 +1,5 @@ +# File with no Azure credentials +# This is a clean configuration file +app_name = my_application +port = 8080 +debug = true diff --git a/tests/detect_azure_credentials_test.py b/tests/detect_azure_credentials_test.py new file mode 100644 index 00000000..e7a745ca --- /dev/null +++ b/tests/detect_azure_credentials_test.py @@ -0,0 +1,168 @@ +from __future__ import annotations + +import pytest + +from pre_commit_hooks.detect_azure_credentials import main +from testing.util import get_resource_path + + +@pytest.mark.parametrize( + ("filename", "expected_retval"), + ( + ("azure_credentials.txt", 1), + ("azure_no_credentials.txt", 0), + ("nonsense.txt", 0), + ("ok_json.json", 0), + ), +) +def test_detect_azure_credentials(filename, expected_retval): + """Test detection of Azure credentials in various files.""" + ret = main((get_resource_path(filename),)) + assert ret == expected_retval + + +def test_detect_multiple_files(): + """Test scanning multiple files at once.""" + ret = main( + ( + get_resource_path("azure_credentials.txt"), + get_resource_path("azure_no_credentials.txt"), + ) + ) + # Should return 1 because at least one file has credentials + assert ret == 1 + + +def test_detect_multiple_credentials_in_single_file(): + """Test that multiple credentials in one file are all detected.""" + ret = main((get_resource_path("azure_credentials.txt"),)) + assert ret == 1 + + +def test_no_credentials_in_multiple_files(): + """Test scanning multiple clean files.""" + ret = main( + ( + get_resource_path("azure_no_credentials.txt"), + get_resource_path("nonsense.txt"), + get_resource_path("ok_json.json"), + ) + ) + assert ret == 0 + + +def test_datafactory_shir_key_detection(): + """Test specific detection of Azure Data Factory SHIR keys.""" + ret = main((get_resource_path("azure_credentials.txt"),)) + assert ret == 1 + + +def test_storage_credential_86char_detection(): + """Test detection of 86 character storage credentials.""" + ret = main((get_resource_path("azure_credentials.txt"),)) + assert ret == 1 + + +def test_storage_credential_43char_detection(): + """Test detection of 43 character storage credentials.""" + ret = main((get_resource_path("azure_credentials.txt"),)) + assert ret == 1 + + +def test_blob_url_with_sas_detection(): + """Test detection of blob URLs with SAS tokens.""" + ret = main((get_resource_path("azure_credentials.txt"),)) + assert ret == 1 + + +def test_userid_password_detection(): + """Test detection of userid/password pairs.""" + ret = main((get_resource_path("azure_credentials.txt"),)) + assert ret == 1 + + +def test_machinekey_detection(): + """Test detection of machine keys.""" + ret = main((get_resource_path("azure_credentials.txt"),)) + assert ret == 1 + + +def test_connection_string_password_detection(): + """Test detection of passwords in connection strings.""" + ret = main((get_resource_path("azure_credentials.txt"),)) + assert ret == 1 + + +def test_network_credential_detection(): + """Test detection of network credentials with domains.""" + ret = main((get_resource_path("azure_credentials.txt"),)) + assert ret == 1 + + +def test_devops_pat_detection(): + """Test detection of DevOps Personal Access Tokens.""" + ret = main((get_resource_path("azure_credentials.txt"),)) + assert ret == 1 + + +def test_app_service_deployment_detection(): + """Test detection of App Service deployment secrets.""" + ret = main((get_resource_path("azure_credentials.txt"),)) + assert ret == 1 + + +def test_allows_arbitrarily_encoded_files(tmpdir): + """Test that binary/arbitrarily encoded files don't cause crashes.""" + arbitrary_encoding = tmpdir.join("binary_file") + arbitrary_encoding.write_binary(b"\x12\x9a\xe2\xf2\xff\xfe") + ret = main((str(arbitrary_encoding),)) + assert ret == 0 + + +def test_obfuscation_in_output(capsys): + """Test that credentials are obfuscated in output.""" + ret = main((get_resource_path("azure_credentials.txt"),)) + assert ret == 1 + + out, _ = capsys.readouterr() + # Verify output contains filename and pattern name + assert "azure_credentials.txt" in out + assert "datafactory-shir" in out + # Verify the actual credential is obfuscated (contains ***) + assert "***" in out + # Verify the full credential is NOT in output + assert "uUY/w9WdKTdAWWPDMrEEWdAEZIgeXlrO51GtVUR1/BE=" not in out + + +def test_output_format_with_pattern_name(capsys): + """Test that output includes pattern name for easier debugging.""" + ret = main((get_resource_path("azure_credentials.txt"),)) + assert ret == 1 + + out, _ = capsys.readouterr() + # Should mention the file + assert "azure_credentials.txt" in out + # Should include pattern names in parentheses + assert "(" in out and ")" in out + + +def test_empty_file(tmpdir): + """Test scanning an empty file.""" + empty_file = tmpdir.join("empty.txt") + empty_file.write("") + ret = main((str(empty_file),)) + assert ret == 0 + + +def test_file_with_partial_patterns(tmpdir): + """Test that partial/incomplete patterns don't trigger false positives.""" + partial = tmpdir.join("partial.txt") + partial.write( + "# These are incomplete patterns that should NOT match\n" + "IR@incomplete\n" + "AccountKey=short\n" + "password=\n" + "sig=toolittledata\n", + ) + ret = main((str(partial),)) + assert ret == 0 From 41d325d33faa3d4f7d8a2cfcea3dd1df98de4a44 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 16 Dec 2025 05:13:01 +0000 Subject: [PATCH 2/2] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- pre_commit_hooks/detect_azure_credentials.py | 114 +++++++++---------- tests/detect_azure_credentials_test.py | 82 ++++++------- 2 files changed, 98 insertions(+), 98 deletions(-) diff --git a/pre_commit_hooks/detect_azure_credentials.py b/pre_commit_hooks/detect_azure_credentials.py index 2085add4..392af1d7 100644 --- a/pre_commit_hooks/detect_azure_credentials.py +++ b/pre_commit_hooks/detect_azure_credentials.py @@ -15,27 +15,27 @@ class BadFile(NamedTuple): # Patterns based on https://github.com/gitleaks/gitleaks/pull/1291 # Azure Data Factory SHIR Key format: IR@{GUID}@{resource_name}@{location}@{base64} AZURE_DATAFACTORY_SHIR_PATTERN = re.compile( - rb"IR@[0-9a-zA-Z-]{36}@[^@\s]+@[0-9a-zA-Z\-=]*@[A-Za-z0-9+/=]{44}", + rb'IR@[0-9a-zA-Z-]{36}@[^@\s]+@[0-9a-zA-Z\-=]*@[A-Za-z0-9+/=]{44}', ) # CSCAN0020, CSCAN0030 - App service deployment secrets AZURE_APP_SERVICE_DEPLOYMENT_PATTERN = re.compile( - rb"MII[a-zA-Z0-9=_\-]{200,}", + rb'MII[a-zA-Z0-9=_\-]{200,}', ) # CSCAN0030, CSCAN0090, CSCAN0150 - Storage credentials (86 char) AZURE_STORAGE_86CHAR_PATTERN = re.compile( - rb"[ \t]{0,10}[a-zA-Z0-9/+]{86}==", + rb'[ \t]{0,10}[a-zA-Z0-9/+]{86}==', ) # CSCAN0030, CSCAN0090, CSCAN0150 - Storage credentials (43 char) AZURE_STORAGE_43CHAR_PATTERN = re.compile( - rb"[a-zA-Z0-9/+]{43}=[^{@\d%\s]", + rb'[a-zA-Z0-9/+]{43}=[^{@\d%\s]', ) # CSCAN0030, CSCAN0090, CSCAN0150 - SAS/sig tokens AZURE_STORAGE_SIG_PATTERN = re.compile( - rb"(?:sig|sas|password)=[a-zA-Z0-9%]{43,53}%3[dD]", + rb'(?:sig|sas|password)=[a-zA-Z0-9%]{43,53}%3[dD]', re.IGNORECASE, ) @@ -47,7 +47,7 @@ class BadFile(NamedTuple): # CSCAN0030 - AccountKey with MII prefix AZURE_STORAGE_ACCOUNTKEY_PATTERN = re.compile( - rb"AccountKey\s*=\s*MII[a-zA-Z0-9/+]{43,}={0,2}", + rb'AccountKey\s*=\s*MII[a-zA-Z0-9/+]{43,}={0,2}', re.IGNORECASE, ) @@ -59,13 +59,13 @@ class BadFile(NamedTuple): # CSCAN0130 - Monitoring Agent credentials AZURE_STORAGE_MONIKER_PATTERN = re.compile( - rb"Account Moniker\s*=.*?key\s*=", + rb'Account Moniker\s*=.*?key\s*=', re.IGNORECASE, ) # CSCAN0110 - Blob URL with SAS token AZURE_STORAGE_BLOBURL_PATTERN = re.compile( - rb"https://[a-zA-Z0-9-]+\.(?:blob|file|queue|table|dfs|z\d+\.web)\.core\.windows\.net/.*?sig=[a-zA-Z0-9%]{30,}", + rb'https://[a-zA-Z0-9-]+\.(?:blob|file|queue|table|dfs|z\d+\.web)\.core\.windows\.net/.*?sig=[a-zA-Z0-9%]{30,}', re.IGNORECASE, ) @@ -101,13 +101,13 @@ class BadFile(NamedTuple): # CSCAN0160 - NetworkCredential with domain AZURE_NETWORK_CREDENTIAL_PATTERN = re.compile( - rb"NetworkCredential\([^)]*?(?:corp|europe|middleeast|northamerica|southpacific|southamerica|fareast|africa|redmond|exchange|extranet|partners|extranettest|parttest|noe|ntdev|ntwksta|sys-wingroup|windeploy|wingroup|winse|segroup|xcorp|xrep|phx|gme|usme|cdocidm|mslpa)\)", + rb'NetworkCredential\([^)]*?(?:corp|europe|middleeast|northamerica|southpacific|southamerica|fareast|africa|redmond|exchange|extranet|partners|extranettest|parttest|noe|ntdev|ntwksta|sys-wingroup|windeploy|wingroup|winse|segroup|xcorp|xrep|phx|gme|usme|cdocidm|mslpa)\)', re.IGNORECASE, ) # CSCAN0160 - schtasks with domain credentials AZURE_NETWORK_SCHTASKS_PATTERN = re.compile( - rb"schtasks.*?/ru\s+(?:corp|europe|middleeast|northamerica|southpacific|southamerica|fareast|africa|redmond|exchange|extranet|partners|extranettest|parttest|noe|ntdev|ntwksta|sys-wingroup|windeploy|wingroup|winse|segroup|xcorp|xrep|phx|gme|usme|cdocidm|mslpa).*?/rp", + rb'schtasks.*?/ru\s+(?:corp|europe|middleeast|northamerica|southpacific|southamerica|fareast|africa|redmond|exchange|extranet|partners|extranettest|parttest|noe|ntdev|ntwksta|sys-wingroup|windeploy|wingroup|winse|segroup|xcorp|xrep|phx|gme|usme|cdocidm|mslpa).*?/rp', re.IGNORECASE, ) @@ -119,7 +119,7 @@ class BadFile(NamedTuple): # CSCAN0200 - DevDiv TFVC credentials AZURE_DEVTFVC_PATTERN = re.compile( - rb"enc_username=.+[\n\r\s]+enc_password=.{3,}", + rb'enc_username=.+[\n\r\s]+enc_password=.{3,}', ) # CSCAN0240 - DevOps Personal Access Token @@ -135,12 +135,12 @@ class BadFile(NamedTuple): # CSCAN0060 - PEM certificate files with private key PEM_PRIVATE_KEY_PATTERN = re.compile( - rb"-{5}BEGIN( ([DR]SA|EC|OPENSSH))? PRIVATE KEY-{5}", + rb'-{5}BEGIN( ([DR]SA|EC|OPENSSH))? PRIVATE KEY-{5}', ) # CSCAN0080 - SecurityConfig XML passwords SECURITY_CONFIG_PASSWORD_PATTERN = re.compile( - rb"<[pP]ass[wW]ord>[^<]+", + rb'<[pP]ass[wW]ord>[^<]+', ) # CSCAN0110 - Script passwords in PowerShell/CMD @@ -155,7 +155,7 @@ class BadFile(NamedTuple): # CSCAN0210 - Git credentials GIT_CREDENTIALS_PATTERN = re.compile( - rb"[hH][tT][tT][pP][sS]?://.+:.+@[^/]+\.[cC][oO][mM]", + rb'[hH][tT][tT][pP][sS]?://.+:.+@[^/]+\.[cC][oO][mM]', ) # CSCAN0220 - Password contexts (ConvertTo-SecureString, X509Certificate2, etc.) @@ -165,12 +165,12 @@ class BadFile(NamedTuple): # CSCAN0230 - Slack tokens SLACK_TOKEN_PATTERN = re.compile( - rb"xoxp-[a-zA-Z0-9]+-[a-zA-Z0-9]+-[a-zA-Z0-9]+-[a-zA-Z0-9]+|xoxb-[a-zA-Z0-9]+-[a-zA-Z0-9]+", + rb'xoxp-[a-zA-Z0-9]+-[a-zA-Z0-9]+-[a-zA-Z0-9]+-[a-zA-Z0-9]+|xoxb-[a-zA-Z0-9]+-[a-zA-Z0-9]+', ) # CSCAN0250 - OAuth/JWT tokens and refresh tokens JWT_TOKEN_PATTERN = re.compile( - rb"eyJ[a-zA-Z0-9\-_%]+\.eyJ[a-zA-Z0-9\-_%]+\.[a-zA-Z0-9\-_%]+", + rb'eyJ[a-zA-Z0-9\-_%]+\.eyJ[a-zA-Z0-9\-_%]+\.[a-zA-Z0-9\-_%]+', ) REFRESH_TOKEN_PATTERN = re.compile( @@ -179,7 +179,7 @@ class BadFile(NamedTuple): # CSCAN0260 - Ansible Vault (corrected from CSCAN0270) ANSIBLE_VAULT_PATTERN = re.compile( - rb"\$ANSIBLE_VAULT;[0-9]\.[0-9];AES256[\r\n]+\d+", + rb'\$ANSIBLE_VAULT;[0-9]\.[0-9];AES256[\r\n]+\d+', ) # CSCAN0270 - Azure PowerShell Token Cache @@ -189,44 +189,44 @@ class BadFile(NamedTuple): # CSCAN0140 - Default/known passwords DEFAULT_PASSWORDS_PATTERN = re.compile( - rb"(T!T@n1130|[pP]0rsche911|[cC]o[mM][mM]ac\!12|[pP][aA]ss@[wW]or[dD]1|[rR]dP[aA]\$\$[wW]0r[dD]|iis6\!dfu|[pP]@ss[wW]or[dD]1|[pP][aA]\$\$[wW]or[dD]1|\!\!123ab|[aA]dmin123|[pP]@ss[wW]0r[dD]1|[uU]ser@123|[aA]bc@123|[pP][aA]ss[wW]or[dD]@123|homerrocks|[pP][aA]\$\$[wW]0r[dD]1?|Y29NbWFjITEy|[pP][aA]ss4Sales|WS2012R2R0cks\!|DSFS0319Test|March2010M2\!|[pP][aA]ss[wW]ord~1|[mM]icr0s0ft|test1test\!|123@tieorg|homerocks|[eE]lvis1)", + rb'(T!T@n1130|[pP]0rsche911|[cC]o[mM][mM]ac\!12|[pP][aA]ss@[wW]or[dD]1|[rR]dP[aA]\$\$[wW]0r[dD]|iis6\!dfu|[pP]@ss[wW]or[dD]1|[pP][aA]\$\$[wW]or[dD]1|\!\!123ab|[aA]dmin123|[pP]@ss[wW]0r[dD]1|[uU]ser@123|[aA]bc@123|[pP][aA]ss[wW]or[dD]@123|homerrocks|[pP][aA]\$\$[wW]0r[dD]1?|Y29NbWFjITEy|[pP][aA]ss4Sales|WS2012R2R0cks\!|DSFS0319Test|March2010M2\!|[pP][aA]ss[wW]ord~1|[mM]icr0s0ft|test1test\!|123@tieorg|homerocks|[eE]lvis1)', ) PATTERNS = [ - ("datafactory-shir", AZURE_DATAFACTORY_SHIR_PATTERN), - ("app-service-deployment", AZURE_APP_SERVICE_DEPLOYMENT_PATTERN), - ("publishsettings-pwd", PUBLISHSETTINGS_PWD_PATTERN), - ("storage-86char", AZURE_STORAGE_86CHAR_PATTERN), - ("storage-43char", AZURE_STORAGE_43CHAR_PATTERN), - ("storage-sig", AZURE_STORAGE_SIG_PATTERN), - ("storage-useridpw", AZURE_STORAGE_USERIDPW_PATTERN), - ("storage-accountkey", AZURE_STORAGE_ACCOUNTKEY_PATTERN), - ("storage-servicebus", AZURE_STORAGE_SERVICEBUS_PATTERN), - ("storage-moniker", AZURE_STORAGE_MONIKER_PATTERN), - ("storage-bloburl", AZURE_STORAGE_BLOBURL_PATTERN), - ("password-machinekey", AZURE_PASSWORD_MACHINEKEY_PATTERN), - ("password-addkey", AZURE_PASSWORD_ADDKEY_PATTERN), - ("password-connstring", AZURE_PASSWORD_CONNSTRING_PATTERN), - ("password-value", AZURE_PASSWORD_VALUE_PATTERN), - ("password-uidpw", AZURE_PASSWORD_UIDPW_PATTERN), - ("network-credential", AZURE_NETWORK_CREDENTIAL_PATTERN), - ("network-schtasks", AZURE_NETWORK_SCHTASKS_PATTERN), - ("network-dotnet", AZURE_NETWORK_DOTNET_PATTERN), - ("devtfvc-secrets", AZURE_DEVTFVC_PATTERN), - ("devops-pat", AZURE_DEVOPS_PAT_PATTERN), - ("pem-private-key", PEM_PRIVATE_KEY_PATTERN), - ("security-config-password", SECURITY_CONFIG_PASSWORD_PATTERN), - ("script-password", SCRIPT_PASSWORD_PATTERN), - ("general-password", GENERAL_PASSWORD_PATTERN), - ("git-credentials", GIT_CREDENTIALS_PATTERN), - ("password-context", PASSWORD_CONTEXT_PATTERN), - ("slack-token", SLACK_TOKEN_PATTERN), - ("jwt-token", JWT_TOKEN_PATTERN), - ("refresh-token", REFRESH_TOKEN_PATTERN), - ("ansible-vault", ANSIBLE_VAULT_PATTERN), - ("azure-powershell-token", AZURE_POWERSHELL_TOKEN_PATTERN), - ("default-passwords", DEFAULT_PASSWORDS_PATTERN), + ('datafactory-shir', AZURE_DATAFACTORY_SHIR_PATTERN), + ('app-service-deployment', AZURE_APP_SERVICE_DEPLOYMENT_PATTERN), + ('publishsettings-pwd', PUBLISHSETTINGS_PWD_PATTERN), + ('storage-86char', AZURE_STORAGE_86CHAR_PATTERN), + ('storage-43char', AZURE_STORAGE_43CHAR_PATTERN), + ('storage-sig', AZURE_STORAGE_SIG_PATTERN), + ('storage-useridpw', AZURE_STORAGE_USERIDPW_PATTERN), + ('storage-accountkey', AZURE_STORAGE_ACCOUNTKEY_PATTERN), + ('storage-servicebus', AZURE_STORAGE_SERVICEBUS_PATTERN), + ('storage-moniker', AZURE_STORAGE_MONIKER_PATTERN), + ('storage-bloburl', AZURE_STORAGE_BLOBURL_PATTERN), + ('password-machinekey', AZURE_PASSWORD_MACHINEKEY_PATTERN), + ('password-addkey', AZURE_PASSWORD_ADDKEY_PATTERN), + ('password-connstring', AZURE_PASSWORD_CONNSTRING_PATTERN), + ('password-value', AZURE_PASSWORD_VALUE_PATTERN), + ('password-uidpw', AZURE_PASSWORD_UIDPW_PATTERN), + ('network-credential', AZURE_NETWORK_CREDENTIAL_PATTERN), + ('network-schtasks', AZURE_NETWORK_SCHTASKS_PATTERN), + ('network-dotnet', AZURE_NETWORK_DOTNET_PATTERN), + ('devtfvc-secrets', AZURE_DEVTFVC_PATTERN), + ('devops-pat', AZURE_DEVOPS_PAT_PATTERN), + ('pem-private-key', PEM_PRIVATE_KEY_PATTERN), + ('security-config-password', SECURITY_CONFIG_PASSWORD_PATTERN), + ('script-password', SCRIPT_PASSWORD_PATTERN), + ('general-password', GENERAL_PASSWORD_PATTERN), + ('git-credentials', GIT_CREDENTIALS_PATTERN), + ('password-context', PASSWORD_CONTEXT_PATTERN), + ('slack-token', SLACK_TOKEN_PATTERN), + ('jwt-token', JWT_TOKEN_PATTERN), + ('refresh-token', REFRESH_TOKEN_PATTERN), + ('ansible-vault', ANSIBLE_VAULT_PATTERN), + ('azure-powershell-token', AZURE_POWERSHELL_TOKEN_PATTERN), + ('default-passwords', DEFAULT_PASSWORDS_PATTERN), ] @@ -241,7 +241,7 @@ def check_file_for_azure_keys( bad_files = [] for filename in filenames: - with open(filename, "rb") as content: + with open(filename, 'rb') as content: text_body = content.read() # Check all Azure credential patterns @@ -253,11 +253,11 @@ def check_file_for_azure_keys( match = match[0] # Obfuscate the key - key_str = match.decode("utf-8", errors="replace") + key_str = match.decode('utf-8', errors='replace') if len(key_str) > 20: - key_hidden = key_str[:10] + "***" + key_str[-7:] + key_hidden = key_str[:10] + '***' + key_str[-7:] else: - key_hidden = key_str[:4] + "***" + key_hidden = key_str[:4] + '***' bad_files.append( BadFile(filename, key_hidden, pattern_name), @@ -268,7 +268,7 @@ def check_file_for_azure_keys( def main(argv: Sequence[str] | None = None) -> int: parser = argparse.ArgumentParser() - parser.add_argument("filenames", nargs="+", help="Filenames to run") + parser.add_argument('filenames', nargs='+', help='Filenames to run') args = parser.parse_args(argv) bad_filenames = check_file_for_azure_keys(args.filenames) @@ -283,5 +283,5 @@ def main(argv: Sequence[str] | None = None) -> int: return 0 -if __name__ == "__main__": +if __name__ == '__main__': raise SystemExit(main()) diff --git a/tests/detect_azure_credentials_test.py b/tests/detect_azure_credentials_test.py index e7a745ca..c9bfd205 100644 --- a/tests/detect_azure_credentials_test.py +++ b/tests/detect_azure_credentials_test.py @@ -7,12 +7,12 @@ @pytest.mark.parametrize( - ("filename", "expected_retval"), + ('filename', 'expected_retval'), ( - ("azure_credentials.txt", 1), - ("azure_no_credentials.txt", 0), - ("nonsense.txt", 0), - ("ok_json.json", 0), + ('azure_credentials.txt', 1), + ('azure_no_credentials.txt', 0), + ('nonsense.txt', 0), + ('ok_json.json', 0), ), ) def test_detect_azure_credentials(filename, expected_retval): @@ -25,9 +25,9 @@ def test_detect_multiple_files(): """Test scanning multiple files at once.""" ret = main( ( - get_resource_path("azure_credentials.txt"), - get_resource_path("azure_no_credentials.txt"), - ) + get_resource_path('azure_credentials.txt'), + get_resource_path('azure_no_credentials.txt'), + ), ) # Should return 1 because at least one file has credentials assert ret == 1 @@ -35,7 +35,7 @@ def test_detect_multiple_files(): def test_detect_multiple_credentials_in_single_file(): """Test that multiple credentials in one file are all detected.""" - ret = main((get_resource_path("azure_credentials.txt"),)) + ret = main((get_resource_path('azure_credentials.txt'),)) assert ret == 1 @@ -43,126 +43,126 @@ def test_no_credentials_in_multiple_files(): """Test scanning multiple clean files.""" ret = main( ( - get_resource_path("azure_no_credentials.txt"), - get_resource_path("nonsense.txt"), - get_resource_path("ok_json.json"), - ) + get_resource_path('azure_no_credentials.txt'), + get_resource_path('nonsense.txt'), + get_resource_path('ok_json.json'), + ), ) assert ret == 0 def test_datafactory_shir_key_detection(): """Test specific detection of Azure Data Factory SHIR keys.""" - ret = main((get_resource_path("azure_credentials.txt"),)) + ret = main((get_resource_path('azure_credentials.txt'),)) assert ret == 1 def test_storage_credential_86char_detection(): """Test detection of 86 character storage credentials.""" - ret = main((get_resource_path("azure_credentials.txt"),)) + ret = main((get_resource_path('azure_credentials.txt'),)) assert ret == 1 def test_storage_credential_43char_detection(): """Test detection of 43 character storage credentials.""" - ret = main((get_resource_path("azure_credentials.txt"),)) + ret = main((get_resource_path('azure_credentials.txt'),)) assert ret == 1 def test_blob_url_with_sas_detection(): """Test detection of blob URLs with SAS tokens.""" - ret = main((get_resource_path("azure_credentials.txt"),)) + ret = main((get_resource_path('azure_credentials.txt'),)) assert ret == 1 def test_userid_password_detection(): """Test detection of userid/password pairs.""" - ret = main((get_resource_path("azure_credentials.txt"),)) + ret = main((get_resource_path('azure_credentials.txt'),)) assert ret == 1 def test_machinekey_detection(): """Test detection of machine keys.""" - ret = main((get_resource_path("azure_credentials.txt"),)) + ret = main((get_resource_path('azure_credentials.txt'),)) assert ret == 1 def test_connection_string_password_detection(): """Test detection of passwords in connection strings.""" - ret = main((get_resource_path("azure_credentials.txt"),)) + ret = main((get_resource_path('azure_credentials.txt'),)) assert ret == 1 def test_network_credential_detection(): """Test detection of network credentials with domains.""" - ret = main((get_resource_path("azure_credentials.txt"),)) + ret = main((get_resource_path('azure_credentials.txt'),)) assert ret == 1 def test_devops_pat_detection(): """Test detection of DevOps Personal Access Tokens.""" - ret = main((get_resource_path("azure_credentials.txt"),)) + ret = main((get_resource_path('azure_credentials.txt'),)) assert ret == 1 def test_app_service_deployment_detection(): """Test detection of App Service deployment secrets.""" - ret = main((get_resource_path("azure_credentials.txt"),)) + ret = main((get_resource_path('azure_credentials.txt'),)) assert ret == 1 def test_allows_arbitrarily_encoded_files(tmpdir): """Test that binary/arbitrarily encoded files don't cause crashes.""" - arbitrary_encoding = tmpdir.join("binary_file") - arbitrary_encoding.write_binary(b"\x12\x9a\xe2\xf2\xff\xfe") + arbitrary_encoding = tmpdir.join('binary_file') + arbitrary_encoding.write_binary(b'\x12\x9a\xe2\xf2\xff\xfe') ret = main((str(arbitrary_encoding),)) assert ret == 0 def test_obfuscation_in_output(capsys): """Test that credentials are obfuscated in output.""" - ret = main((get_resource_path("azure_credentials.txt"),)) + ret = main((get_resource_path('azure_credentials.txt'),)) assert ret == 1 out, _ = capsys.readouterr() # Verify output contains filename and pattern name - assert "azure_credentials.txt" in out - assert "datafactory-shir" in out + assert 'azure_credentials.txt' in out + assert 'datafactory-shir' in out # Verify the actual credential is obfuscated (contains ***) - assert "***" in out + assert '***' in out # Verify the full credential is NOT in output - assert "uUY/w9WdKTdAWWPDMrEEWdAEZIgeXlrO51GtVUR1/BE=" not in out + assert 'uUY/w9WdKTdAWWPDMrEEWdAEZIgeXlrO51GtVUR1/BE=' not in out def test_output_format_with_pattern_name(capsys): """Test that output includes pattern name for easier debugging.""" - ret = main((get_resource_path("azure_credentials.txt"),)) + ret = main((get_resource_path('azure_credentials.txt'),)) assert ret == 1 out, _ = capsys.readouterr() # Should mention the file - assert "azure_credentials.txt" in out + assert 'azure_credentials.txt' in out # Should include pattern names in parentheses - assert "(" in out and ")" in out + assert '(' in out and ')' in out def test_empty_file(tmpdir): """Test scanning an empty file.""" - empty_file = tmpdir.join("empty.txt") - empty_file.write("") + empty_file = tmpdir.join('empty.txt') + empty_file.write('') ret = main((str(empty_file),)) assert ret == 0 def test_file_with_partial_patterns(tmpdir): """Test that partial/incomplete patterns don't trigger false positives.""" - partial = tmpdir.join("partial.txt") + partial = tmpdir.join('partial.txt') partial.write( - "# These are incomplete patterns that should NOT match\n" - "IR@incomplete\n" - "AccountKey=short\n" - "password=\n" - "sig=toolittledata\n", + '# These are incomplete patterns that should NOT match\n' + 'IR@incomplete\n' + 'AccountKey=short\n' + 'password=\n' + 'sig=toolittledata\n', ) ret = main((str(partial),)) assert ret == 0