diff --git a/cpp/ql/lib/experimental/Quantum/Language.qll b/cpp/ql/lib/experimental/Quantum/Language.qll new file mode 100644 index 000000000000..8f400858f07c --- /dev/null +++ b/cpp/ql/lib/experimental/Quantum/Language.qll @@ -0,0 +1,12 @@ +private import codeql.cryptography.Model +private import cpp as Lang + +module CryptoInput implements InputSig { + class LocatableElement = Lang::Locatable; + + class UnknownLocation = Lang::UnknownDefaultLocation; +} + +module Crypto = CryptographyBase; + +import OpenSSL diff --git a/cpp/ql/lib/experimental/Quantum/OpenSSL.qll b/cpp/ql/lib/experimental/Quantum/OpenSSL.qll new file mode 100644 index 000000000000..821fc0aec8ff --- /dev/null +++ b/cpp/ql/lib/experimental/Quantum/OpenSSL.qll @@ -0,0 +1,244 @@ +import cpp +import semmle.code.cpp.dataflow.new.DataFlow + +module OpenSSLModel { + import Language + + /** + * Hash function references in OpenSSL. + */ + predicate hash_ref_type_mapping_known(string name, Crypto::THashType algo) { + // `ma` name has an LN_ or SN_ prefix, which we want to ignore + // capture any name after the _ prefix using regex matching + name = ["sha1", "sha160"] and algo instanceof Crypto::SHA1 + or + name = ["sha224", "sha256", "sha384", "sha512"] and algo instanceof Crypto::SHA2 + or + name = ["sha3-224", "sha3-256", "sha3-384", "sha3-512"] and algo instanceof Crypto::SHA3 + or + name = "md2" and algo instanceof Crypto::MD2 + or + name = "md4" and algo instanceof Crypto::MD4 + or + name = "md5" and algo instanceof Crypto::MD5 + or + name = "ripemd160" and algo instanceof Crypto::RIPEMD160 + or + name = "whirlpool" and algo instanceof Crypto::WHIRLPOOL + } + + predicate hash_ref_type_mapping(FunctionCallOrMacroAccess ref, string name, Crypto::THashType algo) { + name = ref.getTargetName().regexpCapture("(?:SN_|LN_|EVP_)([a-z0-9]+)", 1) and + hash_ref_type_mapping_known(name, algo) + } + + class FunctionCallOrMacroAccess extends Element { + FunctionCallOrMacroAccess() { this instanceof FunctionCall or this instanceof MacroAccess } + + string getTargetName() { + result = this.(FunctionCall).getTarget().getName() + or + result = this.(MacroAccess).getMacroName() + } + } + + class HashAlgorithmCallOrMacro extends Crypto::HashAlgorithmInstance instanceof FunctionCallOrMacroAccess + { + HashAlgorithmCallOrMacro() { hash_ref_type_mapping(this, _, _) } + + string getTargetName() { result = this.(FunctionCallOrMacroAccess).getTargetName() } + } + + class HashAlgorithm extends Crypto::HashAlgorithm { + HashAlgorithmCallOrMacro instance; + + HashAlgorithm() { this = Crypto::THashAlgorithm(instance) } + + override string getSHA2OrSHA3DigestSize(Location location) { + ( + this.getHashType() instanceof Crypto::SHA2 or + this.getHashType() instanceof Crypto::SHA3 + ) and + exists(string name | + hash_ref_type_mapping(instance, name, this.getHashType()) and + result = name.regexpFind("\\d{3}", 0, _) and + location = instance.getLocation() + ) + } + + override string getRawAlgorithmName() { result = instance.getTargetName() } + + override Crypto::THashType getHashType() { hash_ref_type_mapping(instance, _, result) } + + Element getInstance() { result = instance } + + override Location getLocation() { result = instance.getLocation() } + } + + /** + * Data-flow configuration for key derivation algorithm flow to EVP_KDF_derive. + */ + module AlgorithmToEVPKeyDeriveConfig implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node source) { + source.asExpr() = any(KeyDerivationAlgorithm a).getInstance() + } + + predicate isSink(DataFlow::Node sink) { + exists(EVP_KDF_derive kdo | + sink.asExpr() = kdo.getCall().getAlgorithmArg() + or + sink.asExpr() = kdo.getCall().getContextArg() // via `EVP_KDF_CTX_set_params` + ) + } + + predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) { + none() // TODO + } + } + + module AlgorithmToEVPKeyDeriveFlow = DataFlow::Global; + + predicate algorithm_to_EVP_KDF_derive(KeyDerivationAlgorithm algo, EVP_KDF_derive derive) { + none() + } + + /** + * Key derivation operation (e.g., `EVP_KDF_derive`) + */ + class EVP_KDF_derive_FunctionCall extends Crypto::KeyDerivationOperationInstance instanceof FunctionCall + { + EVP_KDF_derive_FunctionCall() { this.getTarget().getName() = "EVP_KDF_derive" } + + Expr getAlgorithmArg() { result = super.getArgument(3) } + + Expr getContextArg() { result = super.getArgument(0) } + } + + class EVP_KDF_derive extends Crypto::KeyDerivationOperation { + EVP_KDF_derive_FunctionCall instance; + + EVP_KDF_derive() { this = Crypto::TKeyDerivationOperation(instance) } + + override Crypto::Algorithm getAlgorithm() { algorithm_to_EVP_KDF_derive(result, this) } + + EVP_KDF_derive_FunctionCall getCall() { result = instance } + } + + /** + * Key derivation algorithm nodes + */ + abstract class KeyDerivationAlgorithm extends Crypto::KeyDerivationAlgorithm { + abstract Expr getInstance(); + } + + /** + * `EVP_KDF_fetch` returns a key derivation algorithm. + */ + class EVP_KDF_fetch_Call extends FunctionCall { + EVP_KDF_fetch_Call() { this.getTarget().getName() = "EVP_KDF_fetch" } + + Expr getAlgorithmArg() { result = this.getArgument(1) } + } + + class EVP_KDF_fetch_AlgorithmArg extends Crypto::KeyDerivationAlgorithmInstance instanceof Expr { + EVP_KDF_fetch_AlgorithmArg() { exists(EVP_KDF_fetch_Call call | this = call.getAlgorithmArg()) } + } + + predicate kdf_names(string algo) { algo = ["HKDF", "PKCS12KDF", "PBKDF2"] } + + class KDFAlgorithmStringLiteral extends StringLiteral { + KDFAlgorithmStringLiteral() { kdf_names(this.getValue().toUpperCase()) } + } + + private module AlgorithmStringToFetchConfig implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node src) { src.asExpr() instanceof KDFAlgorithmStringLiteral } + + predicate isSink(DataFlow::Node sink) { sink.asExpr() instanceof EVP_KDF_fetch_AlgorithmArg } + } + + module AlgorithmStringToFetchFlow = DataFlow::Global; + + predicate algorithmStringToKDFFetchArgFlow( + string name, KDFAlgorithmStringLiteral origin, EVP_KDF_fetch_AlgorithmArg arg + ) { + origin.getValue().toUpperCase() = name and + AlgorithmStringToFetchFlow::flow(DataFlow::exprNode(origin), DataFlow::exprNode(arg)) + } + + /** + * HKDF key derivation algorithm. + */ + class HKDF extends KeyDerivationAlgorithm, Crypto::HKDF { + KDFAlgorithmStringLiteral origin; + EVP_KDF_fetch_AlgorithmArg instance; + + HKDF() { + this = Crypto::TKeyDerivationAlgorithm(instance) and + algorithmStringToKDFFetchArgFlow("HKDF", origin, instance) + } + + override string getRawAlgorithmName() { result = origin.getValue() } + + override Crypto::HashAlgorithm getHashAlgorithm() { none() } + + override Crypto::LocatableElement getOrigin(string name) { + result = origin and name = origin.toString() + } + + override Expr getInstance() { result = origin } + } + + /** + * PBKDF2 key derivation algorithm. + */ + class PBKDF2 extends KeyDerivationAlgorithm, Crypto::PBKDF2 { + KDFAlgorithmStringLiteral origin; + EVP_KDF_fetch_AlgorithmArg instance; + + PBKDF2() { + this = Crypto::TKeyDerivationAlgorithm(instance) and + algorithmStringToKDFFetchArgFlow("PBKDF2", origin, instance) + } + + override string getRawAlgorithmName() { result = origin.getValue() } + + override string getIterationCount(Location location) { none() } // TODO + + override string getKeyLength(Location location) { none() } // TODO + + override Crypto::HashAlgorithm getHashAlgorithm() { none() } // TODO + + override Crypto::LocatableElement getOrigin(string name) { + result = origin and name = origin.toString() + } + + override Expr getInstance() { result = instance } + } + + /** + * PKCS12KDF key derivation algorithm. + */ + class PKCS12KDF extends KeyDerivationAlgorithm, Crypto::PKCS12KDF { + KDFAlgorithmStringLiteral origin; + EVP_KDF_fetch_AlgorithmArg instance; + + PKCS12KDF() { + this = Crypto::TKeyDerivationAlgorithm(instance) and + algorithmStringToKDFFetchArgFlow("PKCS12KDF", origin, instance) + } + + override string getRawAlgorithmName() { result = origin.getValue() } + + override string getIterationCount(Location location) { none() } // TODO + + override string getIDByte(Location location) { none() } // TODO + + override Crypto::HashAlgorithm getHashAlgorithm() { none() } // TODO + + override Crypto::LocatableElement getOrigin(string name) { + result = origin and name = origin.toString() + } + + override Expr getInstance() { result = instance } + } +} diff --git a/cpp/ql/lib/qlpack.yml b/cpp/ql/lib/qlpack.yml index 6ffc77714d47..489de9f6cef4 100644 --- a/cpp/ql/lib/qlpack.yml +++ b/cpp/ql/lib/qlpack.yml @@ -6,6 +6,7 @@ extractor: cpp library: true upgrades: upgrades dependencies: + codeql/cryptography: ${workspace} codeql/dataflow: ${workspace} codeql/mad: ${workspace} codeql/rangeanalysis: ${workspace} diff --git a/cpp/ql/src/experimental/Quantum/PrintCBOMGraph.ql b/cpp/ql/src/experimental/Quantum/PrintCBOMGraph.ql new file mode 100644 index 000000000000..d9658105aebf --- /dev/null +++ b/cpp/ql/src/experimental/Quantum/PrintCBOMGraph.ql @@ -0,0 +1,21 @@ +/** + * @name Print CBOM Graph + * @description Outputs a graph representation of the cryptographic bill of materials. + * This query only supports DGML output, as CodeQL DOT output omits properties. + * @kind graph + * @id cpp/print-cbom-graph + */ + +import experimental.Quantum.Language + +query predicate nodes(Crypto::NodeBase node, string key, string value) { + Crypto::nodes_graph_impl(node, key, value) +} + +query predicate edges(Crypto::NodeBase source, Crypto::NodeBase target, string key, string value) { + Crypto::edges_graph_impl(source, target, key, value) +} + +query predicate graphProperties(string key, string value) { + key = "semmle.graphKind" and value = "graph" +} diff --git a/cpp/ql/src/experimental/Quantum/Test2.ql b/cpp/ql/src/experimental/Quantum/Test2.ql new file mode 100644 index 000000000000..3f48f156a430 --- /dev/null +++ b/cpp/ql/src/experimental/Quantum/Test2.ql @@ -0,0 +1,8 @@ +/** + * @name "PQC Test" + */ + +import experimental.Quantum.Language + +from Crypto::NodeBase node +select node diff --git a/java/ql/lib/experimental/Quantum/JCA.qll b/java/ql/lib/experimental/Quantum/JCA.qll new file mode 100644 index 000000000000..3e2d6b53d944 --- /dev/null +++ b/java/ql/lib/experimental/Quantum/JCA.qll @@ -0,0 +1,562 @@ +import java +import semmle.code.java.dataflow.DataFlow +import semmle.code.java.controlflow.Dominance + +module JCAModel { + import Language + + // TODO: Verify that the PBEWith% case works correctly + bindingset[algo] + predicate cipher_names(string algo) { + algo.toUpperCase() + .matches([ + "AES", "AESWrap", "AESWrapPad", "ARCFOUR", "Blowfish", "ChaCha20", "ChaCha20-Poly1305", + "DES", "DESede", "DESedeWrap", "ECIES", "PBEWith%", "RC2", "RC4", "RC5", "RSA" + ].toUpperCase()) + } + + // TODO: Verify that the CFB% case works correctly + bindingset[mode] + predicate cipher_modes(string mode) { + mode.toUpperCase() + .matches([ + "NONE", "CBC", "CCM", "CFB", "CFB%", "CTR", "CTS", "ECB", "GCM", "KW", "KWP", "OFB", + "OFB%", "PCBC" + ].toUpperCase()) + } + + // TODO: Verify that the OAEPWith% case works correctly + bindingset[padding] + predicate cipher_padding(string padding) { + padding + .toUpperCase() + .matches([ + "NoPadding", "ISO10126Padding", "OAEPPadding", "OAEPWith%", "PKCS1Padding", + "PKCS5Padding", "SSL3Padding" + ].toUpperCase()) + } + + /** + * A `StringLiteral` in the `"ALG/MODE/PADDING"` or `"ALG"` format + */ + class CipherStringLiteral extends StringLiteral { + CipherStringLiteral() { cipher_names(this.getValue().splitAt("/")) } + + string getAlgorithmName() { result = this.getValue().splitAt("/", 0) } + + string getMode() { result = this.getValue().splitAt("/", 1) } + + string getPadding() { result = this.getValue().splitAt("/", 2) } + } + + class CipherGetInstanceCall extends Call { + CipherGetInstanceCall() { + this.getCallee().hasQualifiedName("javax.crypto", "Cipher", "getInstance") + } + + Expr getAlgorithmArg() { result = this.getArgument(0) } + + Expr getProviderArg() { result = this.getArgument(1) } + } + + private class JCACipherOperationCall extends Call { + JCACipherOperationCall() { + exists(string s | s in ["doFinal", "wrap", "unwrap"] | + this.getCallee().hasQualifiedName("javax.crypto", "Cipher", s) + ) + } + + DataFlow::Node getInputData() { result.asExpr() = this.getArgument(0) } + } + + /** + * Data-flow configuration modelling flow from a cipher string literal to a `CipherGetInstanceCall` argument. + */ + private module AlgorithmStringToFetchConfig implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node src) { src.asExpr() instanceof CipherStringLiteral } + + predicate isSink(DataFlow::Node sink) { + exists(CipherGetInstanceCall call | sink.asExpr() = call.getAlgorithmArg()) + } + } + + module AlgorithmStringToFetchFlow = DataFlow::Global; + + /** + * The cipher algorithm argument to a `CipherGetInstanceCall`. + * + * For example, in `Cipher.getInstance(algorithm)`, this class represents `algorithm`. + */ + class CipherGetInstanceAlgorithmArg extends Crypto::CipherAlgorithmInstance, + Crypto::BlockCipherModeOfOperationAlgorithmInstance, Crypto::PaddingAlgorithmInstance instanceof Expr + { + CipherGetInstanceCall call; + + CipherGetInstanceAlgorithmArg() { this = call.getAlgorithmArg() } + + /** + * Returns the `StringLiteral` from which this argument is derived, if known. + */ + CipherStringLiteral getOrigin() { + AlgorithmStringToFetchFlow::flow(DataFlow::exprNode(result), + DataFlow::exprNode(this.(Expr).getAChildExpr*())) + } + + CipherGetInstanceCall getCall() { result = call } + } + + /** + * An access to the `javax.crypto.Cipher` class. + */ + private class CipherAccess extends TypeAccess { + CipherAccess() { this.getType().(Class).hasQualifiedName("javax.crypto", "Cipher") } + } + + /** + * An access to a cipher mode field of the `javax.crypto.Cipher` class, + * specifically `ENCRYPT_MODE`, `DECRYPT_MODE`, `WRAP_MODE`, or `UNWRAP_MODE`. + */ + private class JavaxCryptoCipherOperationModeAccess extends FieldAccess { + JavaxCryptoCipherOperationModeAccess() { + this.getQualifier() instanceof CipherAccess and + this.getField().getName().toUpperCase() in [ + "ENCRYPT_MODE", "DECRYPT_MODE", "WRAP_MODE", "UNWRAP_MODE" + ] + } + } + + class CipherUpdateCall extends MethodCall { + CipherUpdateCall() { this.getMethod().hasQualifiedName("javax.crypto", "Cipher", "update") } + + DataFlow::Node getInputData() { result.asExpr() = this.getArgument(0) } + } + + private newtype TCipherModeFlowState = + TUninitializedCipherModeFlowState() or + TInitializedCipherModeFlowState(CipherInitCall call) or + TUsedCipherModeFlowState(CipherInitCall init, CipherUpdateCall update) + + abstract private class CipherModeFlowState extends TCipherModeFlowState { + string toString() { + this = TUninitializedCipherModeFlowState() and result = "uninitialized" + or + this = TInitializedCipherModeFlowState(_) and result = "initialized" + } + + abstract Crypto::CipherOperationSubtype getCipherOperationMode(); + } + + private class UninitializedCipherModeFlowState extends CipherModeFlowState, + TUninitializedCipherModeFlowState + { + override Crypto::CipherOperationSubtype getCipherOperationMode() { + result instanceof Crypto::UnknownCipherOperationMode + } + } + + private class InitializedCipherModeFlowState extends CipherModeFlowState, + TInitializedCipherModeFlowState + { + CipherInitCall call; + DataFlow::Node node1; + DataFlow::Node node2; + Crypto::CipherOperationSubtype mode; + + InitializedCipherModeFlowState() { + this = TInitializedCipherModeFlowState(call) and + DataFlow::localFlowStep(node1, node2) and + node2.asExpr() = call.getQualifier() and + // TODO: does this make this predicate inefficient as it binds with anything? + not node1.asExpr() = call.getQualifier() and + mode = call.getCipherOperationModeType() + } + + CipherInitCall getInitCall() { result = call } + + DataFlow::Node getFstNode() { result = node1 } + + /** + * Returns the node *to* which the state-changing step occurs + */ + DataFlow::Node getSndNode() { result = node2 } + + override Crypto::CipherOperationSubtype getCipherOperationMode() { result = mode } + } + + /** + * Trace from cipher initialization to a cryptographic operation, + * specifically `Cipher.doFinal()`, `Cipher.wrap()`, or `Cipher.unwrap()`. + * + * TODO: handle `Cipher.update()` + */ + private module CipherGetInstanceToCipherOperationConfig implements DataFlow::StateConfigSig { + class FlowState = TCipherModeFlowState; + + predicate isSource(DataFlow::Node src, FlowState state) { + state instanceof UninitializedCipherModeFlowState and + src.asExpr() instanceof CipherGetInstanceCall + } + + predicate isSink(DataFlow::Node sink, FlowState state) { none() } + + predicate isSink(DataFlow::Node sink) { + exists(JCACipherOperationCall c | c.getQualifier() = sink.asExpr()) + } + + predicate isAdditionalFlowStep( + DataFlow::Node node1, FlowState state1, DataFlow::Node node2, FlowState state2 + ) { + node1 = state2.(InitializedCipherModeFlowState).getFstNode() and + node2 = state2.(InitializedCipherModeFlowState).getSndNode() + } + + predicate isBarrier(DataFlow::Node node, FlowState state) { + exists(CipherInitCall call | node.asExpr() = call.getQualifier() | + state instanceof UninitializedCipherModeFlowState + or + state.(InitializedCipherModeFlowState).getInitCall() != call + ) + } + } + + module CipherGetInstanceToCipherOperationFlow = + DataFlow::GlobalWithState; + + class CipherOperationInstance extends Crypto::CipherOperationInstance instanceof Call { + Crypto::CipherOperationSubtype mode; + Crypto::CipherAlgorithmInstance algorithm; + CipherGetInstanceToCipherOperationFlow::PathNode sink; + JCACipherOperationCall doFinalize; + + CipherOperationInstance() { + exists( + CipherGetInstanceToCipherOperationFlow::PathNode src, CipherGetInstanceCall getCipher, + CipherGetInstanceAlgorithmArg arg + | + CipherGetInstanceToCipherOperationFlow::flowPath(src, sink) and + src.getNode().asExpr() = getCipher and + sink.getNode().asExpr() = doFinalize.getQualifier() and + sink.getState().(CipherModeFlowState).getCipherOperationMode() = mode and + this = doFinalize and + arg.getCall() = getCipher and + algorithm = arg + ) + } + + override Crypto::CipherAlgorithmInstance getAlgorithm() { result = algorithm } + + override Crypto::CipherOperationSubtype getCipherOperationSubtype() { result = mode } + + override Crypto::NonceArtifactInstance getNonce() { + NonceArtifactToCipherInitCallFlow::flow(result.asOutputData(), + DataFlow::exprNode(sink.getState() + .(InitializedCipherModeFlowState) + .getInitCall() + .getNonceArg())) + } + + override DataFlow::Node getInputData() { result = doFinalize.getInputData() } + } + + /** + * A block cipher mode of operation, where the mode is specified in the ALG or ALG/MODE/PADDING format. + * + * This class will only exist when the mode (*and its type*) is determinable. + * This is because the mode will always be specified alongside the algorithm and never independently. + * Therefore, we can always assume that a determinable algorithm will have a determinable mode. + * + * In the case that only an algorithm is specified, e.g., "AES", the provider provides a default mode. + * + * TODO: Model the case of relying on a provider default, but alert on it as a bad practice. + */ + class ModeOfOperation extends Crypto::ModeOfOperationAlgorithm { + CipherGetInstanceAlgorithmArg instance; + + ModeOfOperation() { + this = Crypto::TBlockCipherModeOfOperationAlgorithm(instance) and + // TODO: this currently only holds for explicitly defined modes in a string literal. + // Cases with defaults, e.g., "AES", are not yet modelled. + // For these cases, in a CBOM, the AES node would have an unknown edge to its mode child. + exists(instance.getOrigin().getMode()) + } + + override Location getLocation() { result = instance.getLocation() } + + // In this case, the raw name is still only the /MODE/ part. + // TODO: handle defaults + override string getRawAlgorithmName() { result = instance.getOrigin().getMode() } + + private predicate modeToNameMappingKnown(Crypto::TBlockCipherModeOperationType type, string name) { + type instanceof Crypto::ECB and name = "ECB" + or + type instanceof Crypto::CBC and name = "CBC" + or + type instanceof Crypto::GCM and name = "GCM" + or + type instanceof Crypto::CTR and name = "CTR" + or + type instanceof Crypto::XTS and name = "XTS" + or + type instanceof Crypto::CCM and name = "CCM" + or + type instanceof Crypto::SIV and name = "SIV" + or + type instanceof Crypto::OCB and name = "OCB" + } + + override Crypto::TBlockCipherModeOperationType getModeType() { + if this.modeToNameMappingKnown(_, instance.getOrigin().getMode()) + then this.modeToNameMappingKnown(result, instance.getOrigin().getMode()) + else result instanceof Crypto::OtherMode + } + + CipherStringLiteral getInstance() { result = instance } + } + + class PaddingAlgorithm extends Crypto::PaddingAlgorithm { + CipherGetInstanceAlgorithmArg instance; + + PaddingAlgorithm() { + this = Crypto::TPaddingAlgorithm(instance) and + exists(instance.getOrigin().getPadding()) + } + + override Location getLocation() { result = instance.getLocation() } + + override string getRawAlgorithmName() { result = instance.getOrigin().getPadding() } + + bindingset[name] + private predicate paddingToNameMappingKnown(Crypto::TPaddingType type, string name) { + type instanceof Crypto::NoPadding and name = "NOPADDING" + or + type instanceof Crypto::PKCS7 and name = ["PKCS5Padding", "PKCS7Padding"] // TODO: misnomer in the JCA? + or + type instanceof Crypto::OAEP and name.matches("OAEP%") // TODO: handle OAEPWith% + } + + override Crypto::TPaddingType getPaddingType() { + if this.paddingToNameMappingKnown(_, instance.getOrigin().getPadding()) + then this.paddingToNameMappingKnown(result, instance.getOrigin().getPadding()) + else result instanceof Crypto::OtherPadding + } + + CipherStringLiteral getInstance() { result = instance } + } + + class EncryptionAlgorithm extends Crypto::CipherAlgorithm { + CipherStringLiteral origin; + CipherGetInstanceAlgorithmArg instance; + + EncryptionAlgorithm() { + this = Crypto::TCipherAlgorithm(instance) and + instance.getOrigin() = origin + } + + override Location getLocation() { result = instance.getLocation() } + + override Crypto::ModeOfOperationAlgorithm getModeOfOperation() { + result.(ModeOfOperation).getInstance() = origin + } + + override Crypto::PaddingAlgorithm getPadding() { + result.(PaddingAlgorithm).getInstance() = origin + } + + override Crypto::LocatableElement getOrigin(string name) { + result = origin and name = origin.toString() + } + + override string getRawAlgorithmName() { result = origin.getValue() } + + override Crypto::TCipherType getCipherFamily() { + if this.cipherNameMappingKnown(_, origin.getAlgorithmName()) + then this.cipherNameMappingKnown(result, origin.getAlgorithmName()) + else result instanceof Crypto::OtherCipherType + } + + override string getKeySize(Location location) { none() } + + bindingset[name] + private predicate cipherNameMappingKnown(Crypto::TCipherType type, string name) { + name = "AES" and + type instanceof Crypto::AES + or + name = "DES" and + type instanceof Crypto::DES + or + name = "TripleDES" and + type instanceof Crypto::TripleDES + or + name = "IDEA" and + type instanceof Crypto::IDEA + or + name = "CAST5" and + type instanceof Crypto::CAST5 + or + name = "ChaCha20" and + type instanceof Crypto::ChaCha20 + or + name = "RC4" and + type instanceof Crypto::RC4 + or + name = "RC5" and + type instanceof Crypto::RC5 + or + name = "RSA" and + type instanceof Crypto::RSA + } + } + + /** + * Initialization vectors and other nonce artifacts + */ + abstract class NonceParameterInstantiation extends Crypto::NonceArtifactInstance instanceof ClassInstanceExpr + { + override DataFlow::Node asOutputData() { result.asExpr() = this } + } + + class IvParameterSpecInstance extends NonceParameterInstantiation { + IvParameterSpecInstance() { + this.(ClassInstanceExpr) + .getConstructedType() + .hasQualifiedName("javax.crypto.spec", "IvParameterSpec") + } + + override DataFlow::Node getInput() { result.asExpr() = this.(ClassInstanceExpr).getArgument(0) } + } + + // TODO: this also specifies the tag length for GCM + class GCMParameterSpecInstance extends NonceParameterInstantiation { + GCMParameterSpecInstance() { + this.(ClassInstanceExpr) + .getConstructedType() + .hasQualifiedName("javax.crypto.spec", "GCMParameterSpec") + } + + override DataFlow::Node getInput() { result.asExpr() = this.(ClassInstanceExpr).getArgument(1) } + } + + class IvParameterSpecGetIvCall extends MethodCall { + IvParameterSpecGetIvCall() { + this.getMethod().hasQualifiedName("javax.crypto.spec", "IvParameterSpec", "getIV") + } + } + + module NonceArtifactToCipherInitCallConfig implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node src) { + exists(NonceParameterInstantiation n | + src = n.asOutputData() and + not exists(IvParameterSpecGetIvCall m | n.getInput().asExpr() = m) + ) + } + + predicate isSink(DataFlow::Node sink) { + exists(CipherInitCall c | c.getNonceArg() = sink.asExpr()) + } + + predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) { + exists(IvParameterSpecGetIvCall m | + node1.asExpr() = m.getQualifier() and + node2.asExpr() = m + ) + or + exists(NonceParameterInstantiation n | + node1 = n.getInput() and + node2.asExpr() = n + ) + } + } + + module NonceArtifactToCipherInitCallFlow = DataFlow::Global; + + /** + * A data-flow configuration to track flow from a mode field access to + * the mode argument of the `init` method of the `javax.crypto.Cipher` class. + */ + private module JavaxCipherModeAccessToInitConfig implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node src) { + src.asExpr() instanceof JavaxCryptoCipherOperationModeAccess + } + + predicate isSink(DataFlow::Node sink) { + exists(CipherInitCall c | c.getModeArg() = sink.asExpr()) + } + } + + module JavaxCipherModeAccessToInitFlow = DataFlow::Global; + + private predicate cipher_mode_str_to_cipher_mode_known( + string mode, Crypto::CipherOperationSubtype cipher_mode + ) { + mode = "ENCRYPT_MODE" and cipher_mode instanceof Crypto::EncryptionMode + or + mode = "WRAP_MODE" and cipher_mode instanceof Crypto::WrapMode + or + mode = "DECRYPT_MODE" and cipher_mode instanceof Crypto::DecryptionMode + or + mode = "UNWRAP_MODE" and cipher_mode instanceof Crypto::UnwrapMode + } + + class CipherInitCall extends MethodCall { + CipherInitCall() { this.getCallee().hasQualifiedName("javax.crypto", "Cipher", "init") } + + /** + * Returns the mode argument to the `init` method + * that is used to determine the cipher operation mode. + * Note this is the raw expr and not necessarily a direct access + * of a mode. Use `getModeOrigin()` to get the field access origin + * flowing to this argument, if one exists (is known). + */ + Expr getModeArg() { result = this.getArgument(0) } + + JavaxCryptoCipherOperationModeAccess getModeOrigin() { + exists(DataFlow::Node src, DataFlow::Node sink | + JavaxCipherModeAccessToInitFlow::flow(src, sink) and + src.asExpr() = result and + this.getModeArg() = sink.asExpr() + ) + } + + Crypto::CipherOperationSubtype getCipherOperationModeType() { + if cipher_mode_str_to_cipher_mode_known(this.getModeOrigin().getField().getName(), _) + then cipher_mode_str_to_cipher_mode_known(this.getModeOrigin().getField().getName(), result) + else result instanceof Crypto::UnknownCipherOperationMode + } + + Expr getKeyArg() { + result = this.getArgument(1) and this.getMethod().getParameterType(1).hasName("Key") + } + + Expr getNonceArg() { + result = this.getArgument(2) and + this.getMethod().getParameterType(2).hasName("AlgorithmParameterSpec") + } + } + + /** + * Key Material Concept + * any class that implements `java.security.spec.KeySpec` + */ + class KeyMaterialObject extends Class { + KeyMaterialObject() { + exists(RefType t | + this.extendsOrImplements*(t) and + t.hasQualifiedName("java.security.spec", "KeySpec") + ) + } + } + + /** + * KeyMaterial + * ie some plain material that gets used to generate a Key + */ + class KeyMaterialInstantiation extends Crypto::KeyMaterialInstance instanceof ClassInstanceExpr { + KeyMaterialInstantiation() { + this.(ClassInstanceExpr).getConstructedType() instanceof KeyMaterialObject + } + + override DataFlow::Node asOutputData() { result.asExpr() = this } + + override DataFlow::Node getInput() { result.asExpr() = this.(ClassInstanceExpr).getArgument(0) } + } +} diff --git a/java/ql/lib/experimental/Quantum/Language.qll b/java/ql/lib/experimental/Quantum/Language.qll new file mode 100644 index 000000000000..e88bce6d9959 --- /dev/null +++ b/java/ql/lib/experimental/Quantum/Language.qll @@ -0,0 +1,54 @@ +private import codeql.cryptography.Model +private import java as Language +private import semmle.code.java.security.InsecureRandomnessQuery +private import semmle.code.java.security.RandomQuery + +private class UnknownLocation extends Language::Location { + UnknownLocation() { this.getFile().getAbsolutePath() = "" } +} + +/** + * A dummy location which is used when something doesn't have a location in + * the source code but needs to have a `Location` associated with it. There + * may be several distinct kinds of unknown locations. For example: one for + * expressions, one for statements and one for other program elements. + */ +private class UnknownDefaultLocation extends UnknownLocation { + UnknownDefaultLocation() { locations_default(this, _, 0, 0, 0, 0) } +} + +module CryptoInput implements InputSig { + class DataFlowNode = DataFlow::Node; + + class LocatableElement = Language::Element; + + class UnknownLocation = UnknownDefaultLocation; +} + +/** + * Instantiate the model + */ +module Crypto = CryptographyBase; + +/** + * Random number generation, where each instance is modelled as the expression + * tied to an output node (i.e., the result of the source of randomness) + */ +abstract class RandomnessInstance extends Crypto::RandomNumberGenerationInstance { + override DataFlow::Node asOutputData() { result.asExpr() = this } +} + +class SecureRandomnessInstance extends RandomnessInstance { + SecureRandomnessInstance() { + exists(RandomDataSource s | this = s.getOutput() | + s.getSourceOfRandomness() instanceof SecureRandomNumberGenerator + ) + } +} + +class InsecureRandomnessInstance extends RandomnessInstance { + InsecureRandomnessInstance() { exists(InsecureRandomnessSource node | this = node.asExpr()) } +} + +// Import library-specific modeling +import JCA diff --git a/java/ql/lib/qlpack.yml b/java/ql/lib/qlpack.yml index 18b74a919dd2..ae7f13ad7ab3 100644 --- a/java/ql/lib/qlpack.yml +++ b/java/ql/lib/qlpack.yml @@ -6,6 +6,7 @@ extractor: java library: true upgrades: upgrades dependencies: + codeql/cryptography: ${workspace} codeql/dataflow: ${workspace} codeql/mad: ${workspace} codeql/rangeanalysis: ${workspace} diff --git a/java/ql/src/experimental/Quantum/BrokenCrypto.ql b/java/ql/src/experimental/Quantum/BrokenCrypto.ql new file mode 100644 index 000000000000..020ac1b8925a --- /dev/null +++ b/java/ql/src/experimental/Quantum/BrokenCrypto.ql @@ -0,0 +1,77 @@ +/** +* @name Use of a broken or risky cryptographic algorithm +* @description Using broken or weak cryptographic algorithms can allow an attacker to compromise security. +* @kind problem +* @problem.severity warning +* @security-severity 7.5 +* @precision high +* @id java/weak-cryptographic-algorithm-new-model +* @tags security +* external/cwe/cwe-327 +* external/cwe/cwe-328 +*/ + + + +//THIS QUERY IS A REPLICA OF: https://github.com/github/codeql/blob/main/java/ql/src/Security/CWE/CWE-327/BrokenCryptoAlgorithm.ql +//but uses the **NEW MODELLING** +import experimental.Quantum.Language + + +/** + * Gets the name of an algorithm that is known to be insecure. + */ +string getAnInsecureAlgorithmName() { + result = + [ + "DES", "RC2", "RC4", "RC5", + // ARCFOUR is a variant of RC4 + "ARCFOUR", + // Encryption mode ECB like AES/ECB/NoPadding is vulnerable to replay and other attacks + "ECB", + // CBC mode of operation with PKCS#5 or PKCS#7 padding is vulnerable to padding oracle attacks + "AES/CBC/PKCS[57]Padding" + ] + } + + private string rankedInsecureAlgorithm(int i) { + result = rank[i](string s | s = getAnInsecureAlgorithmName()) + } + + private string insecureAlgorithmString(int i) { + i = 1 and result = rankedInsecureAlgorithm(i) + or + result = rankedInsecureAlgorithm(i) + "|" + insecureAlgorithmString(i - 1) + } + + /** + * Gets the regular expression used for matching strings that look like they + * contain an algorithm that is known to be insecure. + */ + string getInsecureAlgorithmRegex() { + result = algorithmRegex(insecureAlgorithmString(max(int i | exists(rankedInsecureAlgorithm(i))))) + } + + bindingset[algorithmString] +private string algorithmRegex(string algorithmString) { + // Algorithms usually appear in names surrounded by characters that are not + // alphabetical characters in the same case. This handles the upper and lower + // case cases. + result = + "((^|.*[^A-Z])(" + algorithmString + ")([^A-Z].*|$))" + + // or... + "|" + + // For lowercase, we want to be careful to avoid being confused by camelCase + // hence we require two preceding uppercase letters to be sure of a case switch, + // or a preceding non-alphabetic character + "((^|.*[A-Z]{2}|.*[^a-zA-Z])(" + algorithmString.toLowerCase() + ")([^a-z].*|$))" +} + +from Crypto::Algorithm alg +where alg.getAlgorithmName().regexpMatch(getInsecureAlgorithmRegex()) and +// Exclude RSA/ECB/.* ciphers. +not alg.getAlgorithmName().regexpMatch("RSA/ECB.*") and +// Exclude German and French sentences. +not alg.getAlgorithmName().regexpMatch(".*\\p{IsLowercase} des \\p{IsLetter}.*") +select alg, "Cryptographic algorithm $@ is weak and should not be used.", alg, +alg.getAlgorithmName() diff --git a/java/ql/src/experimental/Quantum/ConstantPassword.ql b/java/ql/src/experimental/Quantum/ConstantPassword.ql new file mode 100644 index 000000000000..4d895ac8c0f8 --- /dev/null +++ b/java/ql/src/experimental/Quantum/ConstantPassword.ql @@ -0,0 +1,38 @@ +/** + * @name Constant password + * @description Using constant passwords is not secure, because potential attackers can easily recover them from the source code. + * @kind problem + * @problem.severity error + * @security-severity 6.8 + * @precision high + * @id java/constant-password-new-model + * @tags security + * external/cwe/cwe-259 + */ + +//this query is a replica of the concept in: https://github.com/github/codeql/blob/main/swift/ql/src/queries/Security/CWE-259/ConstantPassword.ql +//but uses the **NEW MODELLING** +import experimental.Quantum.Language +import semmle.code.java.dataflow.TaintTracking + +/** + * A constant password is created through either a byte array or string literals. + */ +class ConstantPasswordSource extends Expr { + ConstantPasswordSource() { + this instanceof CharacterLiteral or + this instanceof StringLiteral + } +} + +module ConstantToKeyDerivationFlow implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node source) { source.asExpr() instanceof ConstantPasswordSource } + + predicate isSink(DataFlow::Node sink) { any(Crypto::KeyMaterialInstance i).getInput() = sink } +} + +module ConstantToKeyDerivationFlowInit = TaintTracking::Global; + +from DataFlow::Node source, DataFlow::Node sink +where ConstantToKeyDerivationFlowInit::flow(source, sink) +select sink, "Constant password $@ is used.", source, source.toString() diff --git a/java/ql/src/experimental/Quantum/PrintCBOMGraph.ql b/java/ql/src/experimental/Quantum/PrintCBOMGraph.ql new file mode 100644 index 000000000000..063cda564b60 --- /dev/null +++ b/java/ql/src/experimental/Quantum/PrintCBOMGraph.ql @@ -0,0 +1,21 @@ +/** + * @name Print CBOM Graph + * @description Outputs a graph representation of the cryptographic bill of materials. + * This query only supports DGML output, as CodeQL DOT output omits properties. + * @kind graph + * @id java/print-cbom-graph + */ + +import experimental.Quantum.Language + +query predicate nodes(Crypto::NodeBase node, string key, string value) { + Crypto::nodes_graph_impl(node, key, value) +} + +query predicate edges(Crypto::NodeBase source, Crypto::NodeBase target, string key, string value) { + Crypto::edges_graph_impl(source, target, key, value) +} + +query predicate graphProperties(string key, string value) { + key = "semmle.graphKind" and value = "graph" +} diff --git a/java/ql/src/experimental/Quantum/ReusedNonce.ql b/java/ql/src/experimental/Quantum/ReusedNonce.ql new file mode 100644 index 000000000000..cb0d6ff9d610 --- /dev/null +++ b/java/ql/src/experimental/Quantum/ReusedNonce.ql @@ -0,0 +1,23 @@ +/** + * @name Unsafe nonce source or reuse + * @id java/unsafe-nonce-source-or-reuse + */ + +import experimental.Quantum.Language +import semmle.code.java.dataflow.DataFlow + +Element getNonceOrigin(Crypto::NonceArtifactInstance nonce) { + // TODO: this check is currently ultra hacky just for demoing + result = nonce.getInput().asExpr().(VarAccess).getVariable() +} + +from + Crypto::CipherOperationInstance op, Crypto::NonceArtifactInstance nonce1, + Crypto::NonceArtifactInstance nonce2 +where + op.(Expr).getEnclosingCallable().getName() = "encrypt" and + nonce1 = op.getNonce() and + nonce2 = op.getNonce() and + not nonce1 = nonce2 and + getNonceOrigin(nonce1) = getNonceOrigin(nonce2) +select op, nonce1, nonce2 diff --git a/java/ql/src/experimental/Quantum/TestCipher.ql b/java/ql/src/experimental/Quantum/TestCipher.ql new file mode 100644 index 000000000000..6be308c25606 --- /dev/null +++ b/java/ql/src/experimental/Quantum/TestCipher.ql @@ -0,0 +1,16 @@ +/** + * @name "PQC Test" + */ + +import experimental.Quantum.Language + +from + Crypto::CipherOperation op, Crypto::CipherAlgorithm a, Crypto::ModeOfOperationAlgorithm m, + Crypto::PaddingAlgorithm p, Crypto::Nonce nonce +where + a = op.getAlgorithm() and + m = a.getModeOfOperation() and + p = a.getPadding() and + nonce = op.getNonce() +select op, op.getCipherOperationMode(), a, a.getRawAlgorithmName(), m, m.getRawAlgorithmName(), p, + p.getRawAlgorithmName(), nonce, nonce.getInputData() diff --git a/misc/scripts/cryptography/cbom.sh b/misc/scripts/cryptography/cbom.sh new file mode 100755 index 000000000000..ef558ccf432e --- /dev/null +++ b/misc/scripts/cryptography/cbom.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +CODEQL_PATH="/Users/nicolaswill/.local/share/gh/extensions/gh-codeql/dist/release/v2.20.4/codeql" +DATABASE_PATH="/Users/nicolaswill/pqc/gpt-crypto-test-cases/gpt_ai_gen_jca_test_cases_db" +QUERY_FILE="/Users/nicolaswill/pqc/codeql/java/ql/src/experimental/Quantum/PrintCBOMGraph.ql" +OUTPUT_DIR="graph_output" + +python3 generate_cbom.py -c "$CODEQL_PATH" -d "$DATABASE_PATH" -q "$QUERY_FILE" -o "$OUTPUT_DIR" diff --git a/misc/scripts/cryptography/generate_cbom.py b/misc/scripts/cryptography/generate_cbom.py new file mode 100644 index 000000000000..07400d6cd6a3 --- /dev/null +++ b/misc/scripts/cryptography/generate_cbom.py @@ -0,0 +1,110 @@ +#!/usr/bin/env python3 + +import os +import sys +import argparse +import subprocess +import xml.etree.ElementTree as ET + +def run_codeql_analysis(codeql_path, database_path, query_path, output_dir): + """Runs the CodeQL analysis and generates a DGML file.""" + os.makedirs(output_dir, exist_ok=True) + command = [ + codeql_path, "database", "analyze", database_path, query_path, + "--rerun", "--format=dgml", "--output", output_dir + ] + + print(f"Running CodeQL analysis: {' '.join(command)}") + result = subprocess.run(command, capture_output=True, text=True) + + if result.returncode == 0: + print("Analysis completed successfully.") + else: + print("Analysis failed.") + print(result.stderr) + sys.exit(1) + + return result.returncode + + +def convert_dgml_to_dot(dgml_file, dot_file): + """Converts the DGML file to DOT format using the exact original implementation.""" + print(f"Processing DGML file: {dgml_file}") + + # Read source DGML + with open(dgml_file, "r", encoding="utf-8") as f: + xml_content = f.read() + + root = ET.fromstring(xml_content) + + # Form dot element sequence + body_l = ["digraph cbom {", + "node [shape=box];", + "rankdir=LR;" + ] + + # Process nodes + for node in root.find("{http://schemas.microsoft.com/vs/2009/dgml}Nodes"): + att = node.attrib + node_id = att['Id'] + label_parts = [] + for key, value in att.items(): + if key == 'Id': + continue + elif key == 'Label': + label_parts.append(value) + else: + label_parts.append(f"{key}={value}") + label = "\\n".join(label_parts) + # Escape forward slashes and double quotes + label = label.replace("/", "\\/") + label = label.replace("\"", "\\\"") + prop_l = [f'label="{label}"'] + node_s = f'nd_{node_id} [{", ".join(prop_l)}];' + body_l.append(node_s) + + # Process edges + for edge in root.find("{http://schemas.microsoft.com/vs/2009/dgml}Links"): + att = edge.attrib + edge_label = att.get("Label", "") + edge_label = edge_label.replace("/", "\\/") + edge_label = edge_label.replace("\"", "\\\"") + edge_s = 'nd_{} -> nd_{} [label="{}"];'.format( + att["Source"], att["Target"], edge_label) + body_l.append(edge_s) + + body_l.append("}") + + # Write DOT output + with open(dot_file, "w", encoding="utf-8") as f: + f.write("\n".join(body_l)) + + print(f"DGML file successfully converted to DOT format: {dot_file}") + + +def main(): + parser = argparse.ArgumentParser(description="Run CodeQL analysis and convert DGML to DOT.") + parser.add_argument("-c", "--codeql", required=True, help="Path to CodeQL CLI executable.") + parser.add_argument("-d", "--database", required=True, help="Path to the CodeQL database.") + parser.add_argument("-q", "--query", required=True, help="Path to the .ql query file.") + parser.add_argument("-o", "--output", required=True, help="Output directory for analysis results.") + + args = parser.parse_args() + + # Run CodeQL analysis + run_codeql_analysis(args.codeql, args.database, args.query, args.output) + + # Locate DGML file + dgml_file = os.path.join(args.output, "java", "print-cbom-graph.dgml") + dot_file = dgml_file.replace(".dgml", ".dot") + + if os.path.exists(dgml_file): + # Convert DGML to DOT + convert_dgml_to_dot(dgml_file, dot_file) + else: + print(f"No DGML file found in {args.output}.") + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/python/ql/lib/experimental/Quantum/Base.qll b/python/ql/lib/experimental/Quantum/Base.qll new file mode 100644 index 000000000000..9be24ca6efaa --- /dev/null +++ b/python/ql/lib/experimental/Quantum/Base.qll @@ -0,0 +1,250 @@ +/** + * A language-independent library for reasoning about cryptography. + */ + +import codeql.util.Location +import codeql.util.Option + +signature module InputSig { + class LocatableElement { + Location getLocation(); + } + + class UnknownLocation instanceof Location; +} + +module CryptographyBase Input> { + final class LocatableElement = Input::LocatableElement; + + final class UnknownLocation = Input::UnknownLocation; + + final class UnknownPropertyValue extends string { + UnknownPropertyValue() { this = "" } + } + + abstract class NodeBase instanceof LocatableElement { + /** + * Returns a string representation of this node, usually the name of the operation/algorithm/property. + */ + abstract string toString(); + + /** + * Returns the location of this node in the code. + */ + Location getLocation() { result = super.getLocation() } + + /** + * Gets the origin of this node, e.g., a string literal in source describing it. + */ + LocatableElement getOrigin(string value) { none() } + + /** + * Returns the child of this node with the given edge name. + * + * This predicate is used by derived classes to construct the graph of cryptographic operations. + */ + NodeBase getChild(string edgeName) { none() } + + /** + * Defines properties of this node by name and either a value or location or both. + * + * This predicate is used by derived classes to construct the graph of cryptographic operations. + */ + predicate properties(string key, string value, Location location) { + key = "origin" and location = this.getOrigin(value).getLocation() + } + + /** + * Returns the parent of this node. + */ + final NodeBase getAParent() { result.getChild(_) = this } + } + + class Asset = NodeBase; + + /** + * A cryptographic operation, such as hashing or encryption. + */ + abstract class Operation extends Asset { + /** + * Gets the algorithm associated with this operation. + */ + abstract Algorithm getAlgorithm(); + + /** + * Gets the name of this operation, e.g., "hash" or "encrypt". + */ + abstract string getOperationName(); + + final override string toString() { result = this.getOperationName() } + + override NodeBase getChild(string edgeName) { + result = super.getChild(edgeName) + or + edgeName = "uses" and + if exists(this.getAlgorithm()) then result = this.getAlgorithm() else result = this + } + } + + abstract class Algorithm extends Asset { + /** + * Gets the name of this algorithm, e.g., "AES" or "SHA". + */ + abstract string getAlgorithmName(); + + /** + * Gets the raw name of this algorithm from source (no parsing or formatting) + */ + abstract string getRawAlgorithmName(); + + final override string toString() { result = this.getAlgorithmName() } + } + + /** + * A hashing operation that processes data to generate a hash value. + * This operation takes an input message of arbitrary content and length and produces a fixed-size + * hash value as the output using a specified hashing algorithm. + */ + abstract class HashOperation extends Operation { + abstract override HashAlgorithm getAlgorithm(); + + override string getOperationName() { result = "HASH" } + } + + // Rule: no newtype representing a type of algorithm should be modelled with multiple interfaces + // + // Example: HKDF and PKCS12KDF are both key derivation algorithms. + // However, PKCS12KDF also has a property: the iteration count. + // + // If we have HKDF and PKCS12KDF under TKeyDerivationType, + // someone modelling a library might try to make a generic identification of both of those algorithms. + // + // They will therefore not use the specialized type for PKCS12KDF, + // meaning "from PKCS12KDF algo select algo" will have no results. + // + newtype THashType = + // We're saying by this that all of these have an identical interface / properties / edges + MD5() or + SHA1() or + SHA256() or + SHA512() or + OtherHashType() + + /** + * A hashing algorithm that transforms variable-length input into a fixed-size hash value. + */ + abstract class HashAlgorithm extends Algorithm { + final predicate hashTypeToNameMapping(THashType type, string name) { + type instanceof MD5 and name = "MD5" + or + type instanceof SHA1 and name = "SHA-1" + or + type instanceof SHA256 and name = "SHA-256" + or + type instanceof SHA512 and name = "SHA-512" + or + type instanceof OtherHashType and name = this.getRawAlgorithmName() + } + + abstract THashType getHashType(); + + override string getAlgorithmName() { this.hashTypeToNameMapping(this.getHashType(), result) } + + } + + /** + * An operation that derives one or more keys from an input value. + */ + abstract class KeyDerivationOperation extends Operation { + override string getOperationName() { result = "KEY_DERIVATION" } + } + + /** + * An algorithm that derives one or more keys from an input value. + */ + abstract class KeyDerivationAlgorithm extends Algorithm { + abstract override string getAlgorithmName(); + } + + /** + * HKDF key derivation function + */ + abstract class HKDF extends KeyDerivationAlgorithm { + final override string getAlgorithmName() { result = "HKDF" } + + abstract HashAlgorithm getHashAlgorithm(); + + override NodeBase getChild(string edgeName) { + result = super.getChild(edgeName) + or + edgeName = "digest" and result = this.getHashAlgorithm() + } + } + + /** + * PKCS #12 key derivation function + */ + abstract class PKCS12KDF extends KeyDerivationAlgorithm { + final override string getAlgorithmName() { result = "PKCS12KDF" } + + abstract HashAlgorithm getHashAlgorithm(); + + override NodeBase getChild(string edgeName) { + result = super.getChild(edgeName) + or + edgeName = "digest" and result = this.getHashAlgorithm() + } + } + + newtype TEllipticCurveFamilyType = + // We're saying by this that all of these have an identical interface / properties / edges + NIST() or + SEC() or + NUMS() or + PRIME() or + BRAINPOOL() or + CURVE25519() or + CURVE448() or + C2() or + SM2() or + ES() or + OtherEllipticCurveFamilyType() + + + /** + * Elliptic curve algorithm + */ + abstract class EllipticCurve extends Algorithm { + + + abstract string getKeySize(Location location); + + abstract TEllipticCurveFamilyType getCurveFamilyType(); + + override predicate properties(string key, string value, Location location) { + super.properties(key, value, location) + or + key = "key_size" and + if exists(this.getKeySize(location)) + then value = this.getKeySize(location) + else ( + value instanceof UnknownPropertyValue and location instanceof UnknownLocation + ) + // other properties, like field type are possible, but not modeled until considered necessary + } + + override string getAlgorithmName() { result = this.getRawAlgorithmName().toUpperCase()} + + /** + * Mandating that for Elliptic Curves specifically, users are responsible + * for providing as the 'raw' name, the official name of the algorithm. + * Casing doesn't matter, we will enforce further naming restrictions on + * `getAlgorithmName` by default. + * Rationale: elliptic curve names can have a lot of variation in their components + * (e.g., "secp256r1" vs "P-256"), trying to produce generalized set of properties + * is possible to capture all cases, but such modeling is likely not necessary. + * if all properties need to be captured, we can reassess how names are generated. + */ + override abstract string getRawAlgorithmName(); + } +} diff --git a/python/ql/lib/experimental/Quantum/Language.qll b/python/ql/lib/experimental/Quantum/Language.qll new file mode 100644 index 000000000000..9abf3e7fa7e4 --- /dev/null +++ b/python/ql/lib/experimental/Quantum/Language.qll @@ -0,0 +1,12 @@ +private import Base +private import python as Lang + +module CryptoInput implements InputSig { + class LocatableElement = Lang::Expr; + + class UnknownLocation = Lang::UnknownDefaultLocation; +} + +module Crypto = CryptographyBase; + +import PycaCryptography diff --git a/python/ql/lib/experimental/Quantum/PycaCryptography.qll b/python/ql/lib/experimental/Quantum/PycaCryptography.qll new file mode 100644 index 000000000000..802ad23cf9d9 --- /dev/null +++ b/python/ql/lib/experimental/Quantum/PycaCryptography.qll @@ -0,0 +1,55 @@ +import python +import semmle.python.ApiGraphs + +module PycaCryptographyModule { + import Language + + /** + * Gets a predefined curve class constructor call from + * `cryptography.hazmat.primitives.asymmetric.ec` + * https://cryptography.io/en/latest/hazmat/primitives/asymmetric/ec/#elliptic-curves + */ + DataFlow::Node predefinedCurveClass(string rawName, string curveName, Crypto::TEllipticCurveFamilyType family, int keySize) { + // getACall since the typical case is to construct the curve with initialization values, + // not to pass the mode uninitialized + result = + API::moduleImport("cryptography") + .getMember("hazmat") + .getMember("primitives") + .getMember("asymmetric") + .getMember("ec") + .getMember(rawName) + .getACall() + and + curveName = rawName.toUpperCase() + and + curveName.matches("SEC%") and family instanceof Crypto::SEC + and + curveName.matches("BRAINPOOL%") and family instanceof Crypto::BRAINPOOL + and + // Enumerating all key sizes known in the API + // TODO: should we dynamically extract them through a regex? + keySize in [160, 163, 192, 224, 233, 256, 283, 320, 384, 409, 512, 571] + and + curveName.matches("%" + keySize + "%") + } + + + class EllipticCurve extends Crypto::EllipticCurve instanceof Expr{ + int keySize; + string rawName; + string curveName; + Crypto::TEllipticCurveFamilyType family; + EllipticCurve() { + this = predefinedCurveClass(rawName, curveName, family, keySize).asExpr() + } + + override string getRawAlgorithmName() { result = rawName } + override string getAlgorithmName() { result = curveName } + Crypto::TEllipticCurveFamilyType getFamily() { result = family } + + override string getKeySize(Location location) { + location = this and + result = keySize.toString() } + } +} diff --git a/python/ql/lib/semmle/python/Files.qll b/python/ql/lib/semmle/python/Files.qll index 2da0dd61f885..67f21ad0b249 100644 --- a/python/ql/lib/semmle/python/Files.qll +++ b/python/ql/lib/semmle/python/Files.qll @@ -368,3 +368,40 @@ class EncodingError extends SyntaxError { override string toString() { result = "Encoding Error" } } + + + +/** + * A dummy location which is used when something doesn't have a location in + * the source code but needs to have a `Location` associated with it. There + * may be several distinct kinds of unknown locations. For example: one for + * expressions, one for statements and one for other program elements. + */ +class UnknownLocation extends Location { + UnknownLocation() { this.getFile().getAbsolutePath() = "" } +} + +/** + * A dummy location which is used when something doesn't have a location in + * the source code but needs to have a `Location` associated with it. + */ +class UnknownDefaultLocation extends UnknownLocation { + UnknownDefaultLocation() { locations_default(this, _, 0, 0, 0, 0) } +} + +/** + * A dummy location which is used when an expression doesn't have a + * location in the source code but needs to have a `Location` associated + * with it. + */ +class UnknownExprLocation extends UnknownLocation { + UnknownExprLocation() { locations_default(this, _, 0, 0, 0, 0) } +} + +/** + * A dummy location which is used when a statement doesn't have a location + * in the source code but needs to have a `Location` associated with it. + */ +class UnknownStmtLocation extends UnknownLocation { + UnknownStmtLocation() { locations_default(this, _, 0, 0, 0, 0) } +} diff --git a/shared/cryptography/codeql/cryptography/Model.qll b/shared/cryptography/codeql/cryptography/Model.qll new file mode 100644 index 000000000000..1b96ba4cf3f4 --- /dev/null +++ b/shared/cryptography/codeql/cryptography/Model.qll @@ -0,0 +1,1025 @@ +/** + * A language-independent library for reasoning about cryptography. + */ + +import codeql.util.Location +import codeql.util.Option + +signature module InputSig { + class LocatableElement { + Location getLocation(); + + string toString(); + } + + class DataFlowNode { + Location getLocation(); + + string toString(); + } + + class UnknownLocation instanceof Location; +} + +module CryptographyBase Input> { + final class LocatableElement = Input::LocatableElement; + + final class UnknownLocation = Input::UnknownLocation; + + final class DataFlowNode = Input::DataFlowNode; + + final class UnknownPropertyValue extends string { + UnknownPropertyValue() { this = "" } + } + + private string getPropertyAsGraphString(NodeBase node, string key, Location root) { + result = + strictconcat(any(string value, Location location, string parsed | + node.properties(key, value, location) and + if location = root or location instanceof UnknownLocation + then parsed = value + else parsed = "(" + value + "," + location.toString() + ")" + | + parsed + ), "," + ) + } + + predicate nodes_graph_impl(NodeBase node, string key, string value) { + not ( + // exclude Artifact nodes with no edges to or from them + node instanceof Artifact and + not (edges_graph_impl(node, _, _, _) or edges_graph_impl(_, node, _, _)) + ) and + ( + key = "semmle.label" and + value = node.toString() + or + // CodeQL's DGML output does not include a location + key = "Location" and + value = node.getLocation().toString() + or + // Known unknown edges should be reported as properties rather than edges + node = node.getChild(key) and + value = "" + or + // Report properties + value = getPropertyAsGraphString(node, key, node.getLocation()) + ) + } + + predicate edges_graph_impl(NodeBase source, NodeBase target, string key, string value) { + key = "semmle.label" and + target = source.getChild(value) and + // Known unknowns are reported as properties rather than edges + not source = target + } + + /** + * All elements in the database that are mapped to nodes must extend the following classes + */ + abstract class HashOperationInstance extends LocatableElement { } + + abstract class HashAlgorithmInstance extends LocatableElement { } + + abstract class KeyDerivationOperationInstance extends LocatableElement { } + + abstract class KeyDerivationAlgorithmInstance extends LocatableElement { } + + abstract class CipherOperationInstance extends LocatableElement { + abstract CipherAlgorithmInstance getAlgorithm(); + + abstract CipherOperationSubtype getCipherOperationSubtype(); + + abstract NonceArtifactInstance getNonce(); + + abstract DataFlowNode getInputData(); + } + + abstract class CipherAlgorithmInstance extends LocatableElement { } + + abstract class KeyEncapsulationOperationInstance extends LocatableElement { } + + abstract class KeyEncapsulationAlgorithmInstance extends LocatableElement { } + + abstract class EllipticCurveAlgorithmInstance extends LocatableElement { } + + // Non-standalone algorithms + abstract class BlockCipherModeOfOperationAlgorithmInstance extends LocatableElement { } + + abstract class PaddingAlgorithmInstance extends LocatableElement { } + + // Artifacts + abstract private class ArtifactLocatableElement extends LocatableElement { + abstract DataFlowNode asOutputData(); + + abstract DataFlowNode getInput(); + } + + abstract class DigestArtifactInstance extends ArtifactLocatableElement { } + + abstract class KeyMaterialInstance extends ArtifactLocatableElement { } + + abstract class KeyArtifactInstance extends ArtifactLocatableElement { } + + abstract class NonceArtifactInstance extends ArtifactLocatableElement { } + + abstract class RandomNumberGenerationInstance extends ArtifactLocatableElement { + final override DataFlowNode getInput() { none() } + } + + newtype TNode = + // Artifacts (data that is not an operation or algorithm, e.g., a key) + TDigest(DigestArtifactInstance e) or + TKey(KeyArtifactInstance e) or + TKeyMaterial(KeyMaterialInstance e) or + TNonce(NonceArtifactInstance e) or + TRandomNumberGeneration(RandomNumberGenerationInstance e) or + // Operations (e.g., hashing, encryption) + THashOperation(HashOperationInstance e) or + TKeyDerivationOperation(KeyDerivationOperationInstance e) or + TCipherOperation(CipherOperationInstance e) or + TKeyEncapsulationOperation(KeyEncapsulationOperationInstance e) or + // Algorithms (e.g., SHA-256, AES) + TCipherAlgorithm(CipherAlgorithmInstance e) or + TEllipticCurveAlgorithm(EllipticCurveAlgorithmInstance e) or + THashAlgorithm(HashAlgorithmInstance e) or + TKeyDerivationAlgorithm(KeyDerivationAlgorithmInstance e) or + TKeyEncapsulationAlgorithm(KeyEncapsulationAlgorithmInstance e) or + // Non-standalone Algorithms (e.g., Mode, Padding) + // TODO: need to rename this, as "mode" is getting reused in different contexts, be precise + TBlockCipherModeOfOperationAlgorithm(BlockCipherModeOfOperationAlgorithmInstance e) or + TPaddingAlgorithm(PaddingAlgorithmInstance e) or + // Composite and hybrid cryptosystems (e.g., RSA-OAEP used with AES, post-quantum hybrid cryptosystems) + // These nodes are always parent nodes and are not modeled but rather defined via library-agnostic patterns. + TKemDemHybridCryptosystem(CipherAlgorithm dem) or // TODO, change this relation and the below ones + TKeyAgreementHybridCryptosystem(CipherAlgorithmInstance ka) or + TAsymmetricEncryptionMacHybridCryptosystem(CipherAlgorithmInstance enc) or + TPostQuantumHybridCryptosystem(CipherAlgorithmInstance enc) + + /** + * The base class for all cryptographic assets, such as operations and algorithms. + * + * Each `NodeBase` is a node in a graph of cryptographic operations, where the edges are the relationships between the nodes. + */ + abstract class NodeBase extends TNode { + /** + * Returns a string representation of this node. + */ + string toString() { result = this.getInternalType() } + + /** + * Returns a string representation of the internal type of this node, usually the name of the class. + */ + abstract string getInternalType(); + + /** + * Returns the location of this node in the code. + */ + abstract Location getLocation(); + + /** + * Gets the origin of this node, e.g., a string literal in source describing it. + */ + LocatableElement getOrigin(string value) { none() } + + /** + * Returns the child of this node with the given edge name. + * + * This predicate is overriden by derived classes to construct the graph of cryptographic operations. + */ + NodeBase getChild(string edgeName) { none() } + + /** + * Defines properties of this node by name and either a value or location or both. + * + * This predicate is overriden by derived classes to construct the graph of cryptographic operations. + */ + predicate properties(string key, string value, Location location) { + key = "origin" and + location = this.getOrigin(value).getLocation() and + not location = this.getLocation() + } + + /** + * Returns a parent of this node. + */ + final NodeBase getAParent() { result.getChild(_) = this } + } + + class Asset = NodeBase; + + abstract class Artifact extends NodeBase { + abstract DataFlowNode asOutputData(); + + abstract DataFlowNode getInputData(); + } + + /** + * A nonce or initialization vector + */ + private class NonceImpl extends Artifact, TNonce { + NonceArtifactInstance instance; + + NonceImpl() { this = TNonce(instance) } + + final override string getInternalType() { result = "Nonce" } + + override Location getLocation() { result = instance.getLocation() } + + override DataFlowNode asOutputData() { result = instance.asOutputData() } + + override DataFlowNode getInputData() { result = instance.getInput() } + } + + final class Nonce = NonceImpl; + + /** + * A source of random number generation + */ + final private class RandomNumberGenerationImpl extends Artifact, TRandomNumberGeneration { + RandomNumberGenerationInstance instance; + + RandomNumberGenerationImpl() { this = TRandomNumberGeneration(instance) } + + final override string getInternalType() { result = "RandomNumberGeneration" } + + override Location getLocation() { result = instance.getLocation() } + + override DataFlowNode asOutputData() { result = instance.asOutputData() } + + override DataFlowNode getInputData() { result = instance.getInput() } + } + + final class RandomNumberGeneration = RandomNumberGenerationImpl; + + /** + * A cryptographic operation, such as hashing or encryption. + */ + abstract class Operation extends Asset { + /** + * Gets the algorithm associated with this operation. + */ + abstract Algorithm getAlgorithm(); + + override NodeBase getChild(string edgeName) { + result = super.getChild(edgeName) + or + edgeName = "uses" and + if exists(this.getAlgorithm()) then result = this.getAlgorithm() else result = this + } + } + + abstract class Algorithm extends Asset { + final override string getInternalType() { result = this.getAlgorithmType() } + + /** + * Gets the name of this algorithm, e.g., "AES" or "SHA". + */ + abstract string getAlgorithmName(); + + /** + * Gets the raw name of this algorithm from source (no parsing or formatting) + */ + abstract string getRawAlgorithmName(); + + /** + * Gets the type of this algorithm, e.g., "hash" or "key derivation". + */ + abstract string getAlgorithmType(); + + override predicate properties(string key, string value, Location location) { + super.properties(key, value, location) + or + // [ONLY_KNOWN] + key = "name" and value = this.getAlgorithmName() and location = this.getLocation() + or + // [ONLY_KNOWN] + key = "raw_name" and value = this.getRawAlgorithmName() and location = this.getLocation() + } + } + + /** + * A hashing operation that processes data to generate a hash value. + * + * This operation takes an input message of arbitrary content and length and produces a fixed-size + * hash value as the output using a specified hashing algorithm. + */ + abstract class HashOperation extends Operation, THashOperation { + abstract override HashAlgorithm getAlgorithm(); + //override string getOperationType() { result = "HashOperation" } + } + + newtype THashType = + MD2() or + MD4() or + MD5() or + SHA1() or + SHA2() or + SHA3() or + RIPEMD160() or + WHIRLPOOL() or + OtherHashType() + + /** + * A hashing algorithm that transforms variable-length input into a fixed-size hash value. + */ + abstract class HashAlgorithm extends Algorithm, THashAlgorithm { + override string getAlgorithmType() { result = "HashAlgorithm" } + + final predicate hashTypeToNameMapping(THashType type, string name) { + type instanceof MD2 and name = "MD2" + or + type instanceof MD4 and name = "MD4" + or + type instanceof MD5 and name = "MD5" + or + type instanceof SHA1 and name = "SHA1" + or + type instanceof SHA2 and name = "SHA2" + or + type instanceof SHA3 and name = "SHA3" + or + type instanceof RIPEMD160 and name = "RIPEMD160" + or + type instanceof WHIRLPOOL and name = "WHIRLPOOL" + or + type instanceof OtherHashType and name = this.getRawAlgorithmName() + } + + /** + * Gets the type of this hashing algorithm, e.g., MD5 or SHA. + * + * When modeling a new hashing algorithm, use this predicate to specify the type of the algorithm. + */ + abstract THashType getHashType(); + + override string getAlgorithmName() { this.hashTypeToNameMapping(this.getHashType(), result) } + + /** + * Gets the digest size of SHA2 or SHA3 algorithms. + * + * This predicate does not need to hold for other algorithms, + * as the digest size is already known based on the algorithm itself. + * + * For `OtherHashType` algorithms where a digest size should be reported, `THashType` + * should be extended to explicitly model that algorithm. If the algorithm has variable + * or multiple digest size variants, a similar predicate to this one must be defined + * for that algorithm to report the digest size. + */ + abstract string getSHA2OrSHA3DigestSize(Location location); + + bindingset[type] + private string getTypeDigestSizeFixed(THashType type) { + type instanceof MD2 and result = "128" + or + type instanceof MD4 and result = "128" + or + type instanceof MD5 and result = "128" + or + type instanceof SHA1 and result = "160" + or + type instanceof RIPEMD160 and result = "160" + or + type instanceof WHIRLPOOL and result = "512" + } + + bindingset[type] + private string getTypeDigestSize(THashType type, Location location) { + result = this.getTypeDigestSizeFixed(type) and location = this.getLocation() + or + type instanceof SHA2 and result = this.getSHA2OrSHA3DigestSize(location) + or + type instanceof SHA3 and result = this.getSHA2OrSHA3DigestSize(location) + } + + string getDigestSize(Location location) { + result = this.getTypeDigestSize(this.getHashType(), location) + } + + final override predicate properties(string key, string value, Location location) { + super.properties(key, value, location) + or + // [KNOWN_OR_UNKNOWN] + key = "digest_size" and + if exists(this.getDigestSize(location)) + then value = this.getDigestSize(location) + else ( + value instanceof UnknownPropertyValue and location instanceof UnknownLocation + ) + } + } + + /** + * An operation that derives one or more keys from an input value. + */ + abstract class KeyDerivationOperation extends Operation, TKeyDerivationOperation { + final override Location getLocation() { + exists(LocatableElement le | this = TKeyDerivationOperation(le) and result = le.getLocation()) + } + //override string getOperationType() { result = "KeyDerivationOperation" } + } + + /** + * An algorithm that derives one or more keys from an input value. + * + * Only use this class to model UNKNOWN key derivation algorithms. + * + * For known algorithms, use the specialized classes, e.g., `HKDF` and `PKCS12KDF`. + */ + abstract class KeyDerivationAlgorithm extends Algorithm, TKeyDerivationAlgorithm { + final override Location getLocation() { + exists(LocatableElement le | this = TKeyDerivationAlgorithm(le) and result = le.getLocation()) + } + + override string getAlgorithmType() { result = "KeyDerivationAlgorithm" } + + override string getAlgorithmName() { result = this.getRawAlgorithmName() } + } + + /** + * An algorithm that derives one or more keys from an input value, using a configurable digest algorithm. + */ + abstract private class KeyDerivationWithDigestParameter extends KeyDerivationAlgorithm { + abstract HashAlgorithm getHashAlgorithm(); + + override NodeBase getChild(string edgeName) { + result = super.getChild(edgeName) + or + ( + // [KNOWN_OR_UNKNOWN] + edgeName = "uses" and + if exists(this.getHashAlgorithm()) then result = this.getHashAlgorithm() else result = this + ) + } + } + + /** + * HKDF key derivation function + */ + abstract class HKDF extends KeyDerivationWithDigestParameter { + final override string getAlgorithmName() { result = "HKDF" } + } + + /** + * PBKDF2 key derivation function + */ + abstract class PBKDF2 extends KeyDerivationWithDigestParameter { + final override string getAlgorithmName() { result = "PBKDF2" } + + /** + * Gets the iteration count of this key derivation algorithm. + */ + abstract string getIterationCount(Location location); + + /** + * Gets the bit-length of the derived key. + */ + abstract string getKeyLength(Location location); + + override predicate properties(string key, string value, Location location) { + super.properties(key, value, location) + or + ( + // [KNOWN_OR_UNKNOWN] + key = "iterations" and + if exists(this.getIterationCount(location)) + then value = this.getIterationCount(location) + else ( + value instanceof UnknownPropertyValue and location instanceof UnknownLocation + ) + ) + or + ( + // [KNOWN_OR_UNKNOWN] + key = "key_len" and + if exists(this.getKeyLength(location)) + then value = this.getKeyLength(location) + else ( + value instanceof UnknownPropertyValue and location instanceof UnknownLocation + ) + ) + } + } + + /** + * PKCS12KDF key derivation function + */ + abstract class PKCS12KDF extends KeyDerivationWithDigestParameter { + override string getAlgorithmName() { result = "PKCS12KDF" } + + /** + * Gets the iteration count of this key derivation algorithm. + */ + abstract string getIterationCount(Location location); + + /** + * Gets the raw ID argument specifying the intended use of the derived key. + * + * The intended use is defined in RFC 7292, appendix B.3, as follows: + * + * This standard specifies 3 different values for the ID byte mentioned above: + * + * 1. If ID=1, then the pseudorandom bits being produced are to be used + * as key material for performing encryption or decryption. + * + * 2. If ID=2, then the pseudorandom bits being produced are to be used + * as an IV (Initial Value) for encryption or decryption. + * + * 3. If ID=3, then the pseudorandom bits being produced are to be used + * as an integrity key for MACing. + */ + abstract string getIDByte(Location location); + + override predicate properties(string key, string value, Location location) { + super.properties(key, value, location) + or + ( + // [KNOWN_OR_UNKNOWN] + key = "iterations" and + if exists(this.getIterationCount(location)) + then value = this.getIterationCount(location) + else ( + value instanceof UnknownPropertyValue and location instanceof UnknownLocation + ) + ) + or + ( + // [KNOWN_OR_UNKNOWN] + key = "id_byte" and + if exists(this.getIDByte(location)) + then value = this.getIDByte(location) + else ( + value instanceof UnknownPropertyValue and location instanceof UnknownLocation + ) + ) + } + } + + /** + * scrypt key derivation function + */ + abstract class SCRYPT extends KeyDerivationAlgorithm { + final override string getAlgorithmName() { result = "scrypt" } + + /** + * Gets the iteration count (`N`) argument + */ + abstract string get_N(Location location); + + /** + * Gets the block size (`r`) argument + */ + abstract string get_r(Location location); + + /** + * Gets the parallelization factor (`p`) argument + */ + abstract string get_p(Location location); + + /** + * Gets the derived key length argument + */ + abstract string getDerivedKeyLength(Location location); + + override predicate properties(string key, string value, Location location) { + super.properties(key, value, location) + or + ( + // [KNOWN_OR_UNKNOWN] + key = "N" and + if exists(this.get_N(location)) + then value = this.get_N(location) + else ( + value instanceof UnknownPropertyValue and location instanceof UnknownLocation + ) + ) + or + ( + // [KNOWN_OR_UNKNOWN] + key = "r" and + if exists(this.get_r(location)) + then value = this.get_r(location) + else ( + value instanceof UnknownPropertyValue and location instanceof UnknownLocation + ) + ) + or + ( + // [KNOWN_OR_UNKNOWN] + key = "p" and + if exists(this.get_p(location)) + then value = this.get_p(location) + else ( + value instanceof UnknownPropertyValue and location instanceof UnknownLocation + ) + ) + or + ( + // [KNOWN_OR_UNKNOWN] + key = "key_len" and + if exists(this.getDerivedKeyLength(location)) + then value = this.getDerivedKeyLength(location) + else ( + value instanceof UnknownPropertyValue and location instanceof UnknownLocation + ) + ) + } + } + + /* + * TODO: + * + * Rule: No newtype representing a type of algorithm should be modelled with multiple interfaces + * + * Example 1: HKDF and PKCS12KDF are both key derivation algorithms. + * However, PKCS12KDF also has a property: the iteration count. + * + * If we have HKDF and PKCS12KDF under TKeyDerivationType, + * someone modelling a library might try to make a generic identification of both of those algorithms. + * + * They will therefore not use the specialized type for PKCS12KDF, + * meaning "from PKCS12KDF algo select algo" will have no results. + * + * Example 2: Each type below represents a common family of elliptic curves, with a shared interface, i.e., + * predicates for library modellers to implement as well as the properties and edges reported. + */ + + /** + * Elliptic curve algorithms + */ + newtype TEllipticCurveType = + NIST() or + SEC() or + NUMS() or + PRIME() or + BRAINPOOL() or + CURVE25519() or + CURVE448() or + C2() or + SM2() or + ES() or + OtherEllipticCurveType() + + abstract class EllipticCurve extends Algorithm, TEllipticCurveAlgorithm { + abstract string getKeySize(Location location); + + abstract TEllipticCurveType getCurveFamily(); + + override predicate properties(string key, string value, Location location) { + super.properties(key, value, location) + or + // [KNOWN_OR_UNKNOWN] + key = "key_size" and + if exists(this.getKeySize(location)) + then value = this.getKeySize(location) + else ( + value instanceof UnknownPropertyValue and location instanceof UnknownLocation + ) + // other properties, like field type are possible, but not modeled until considered necessary + } + + override string getAlgorithmName() { result = this.getRawAlgorithmName().toUpperCase() } + + /** + * Mandating that for Elliptic Curves specifically, users are responsible + * for providing as the 'raw' name, the official name of the algorithm. + * + * Casing doesn't matter, we will enforce further naming restrictions on + * `getAlgorithmName` by default. + * + * Rationale: elliptic curve names can have a lot of variation in their components + * (e.g., "secp256r1" vs "P-256"), trying to produce generalized set of properties + * is possible to capture all cases, but such modeling is likely not necessary. + * if all properties need to be captured, we can reassess how names are generated. + */ + abstract override string getRawAlgorithmName(); + } + + newtype TCipherOperationSubtype = + TEncryptionMode() or + TDecryptionMode() or + TWrapMode() or + TUnwrapMode() or + TUnknownCipherOperationMode() + + abstract class CipherOperationSubtype extends TCipherOperationSubtype { + abstract string toString(); + } + + class EncryptionMode extends CipherOperationSubtype, TEncryptionMode { + override string toString() { result = "Encrypt" } + } + + class DecryptionMode extends CipherOperationSubtype, TDecryptionMode { + override string toString() { result = "Decrypt" } + } + + class WrapMode extends CipherOperationSubtype, TWrapMode { + override string toString() { result = "Wrap" } + } + + class UnwrapMode extends CipherOperationSubtype, TUnwrapMode { + override string toString() { result = "Unwrap" } + } + + class UnknownCipherOperationMode extends CipherOperationSubtype, TUnknownCipherOperationMode { + override string toString() { result = "Unknown" } + } + + /** + * An encryption operation that processes plaintext to generate a ciphertext. + * This operation takes an input message (plaintext) of arbitrary content and length + * and produces a ciphertext as the output using a specified encryption algorithm (with a mode and padding). + */ + class CipherOperationImpl extends Operation, TCipherOperation { + CipherOperationInstance instance; + + CipherOperationImpl() { this = TCipherOperation(instance) } + + override string getInternalType() { result = "CipherOperation" } + + override Location getLocation() { result = instance.getLocation() } + + CipherOperationSubtype getCipherOperationMode() { + result = instance.getCipherOperationSubtype() + } + + final override CipherAlgorithm getAlgorithm() { result.getInstance() = instance.getAlgorithm() } + + override NodeBase getChild(string key) { + result = super.getChild(key) + or + // [KNOWN_OR_UNKNOWN] + key = "nonce" and + if exists(this.getNonce()) then result = this.getNonce() else result = this + } + + override predicate properties(string key, string value, Location location) { + super.properties(key, value, location) + or + // [ALWAYS_KNOWN] - Unknown is handled in getCipherOperationMode() + key = "operation" and + value = this.getCipherOperationMode().toString() and + location = this.getLocation() + } + + /** + * Gets the initialization vector associated with this encryption operation. + * + * This predicate does not need to hold for all encryption operations, + * as the initialization vector is not always required. + */ + Nonce getNonce() { result = TNonce(instance.getNonce()) } + + DataFlowNode getInputData() { result = instance.getInputData() } + } + + final class CipherOperation = CipherOperationImpl; + + /** + * Block cipher modes of operation algorithms + */ + newtype TBlockCipherModeOperationType = + ECB() or // Not secure, widely used + CBC() or // Vulnerable to padding oracle attacks + GCM() or // Widely used AEAD mode (TLS 1.3, SSH, IPsec) + CTR() or // Fast stream-like encryption (SSH, disk encryption) + XTS() or // Standard for full-disk encryption (BitLocker, LUKS, FileVault) + CCM() or // Used in lightweight cryptography (IoT, WPA2) + SIV() or // Misuse-resistant encryption, used in secure storage + OCB() or // Efficient AEAD mode + OtherMode() + + abstract class ModeOfOperationAlgorithm extends Algorithm, TBlockCipherModeOfOperationAlgorithm { + override string getAlgorithmType() { result = "ModeOfOperation" } + + /** + * Gets the type of this mode of operation, e.g., "ECB" or "CBC". + * + * When modeling a new mode of operation, use this predicate to specify the type of the mode. + * + * If a type cannot be determined, the result is `OtherMode`. + */ + abstract TBlockCipherModeOperationType getModeType(); + + bindingset[type] + final private predicate modeToNameMapping(TBlockCipherModeOperationType type, string name) { + type instanceof ECB and name = "ECB" + or + type instanceof CBC and name = "CBC" + or + type instanceof GCM and name = "GCM" + or + type instanceof CTR and name = "CTR" + or + type instanceof XTS and name = "XTS" + or + type instanceof CCM and name = "CCM" + or + type instanceof SIV and name = "SIV" + or + type instanceof OCB and name = "OCB" + or + type instanceof OtherMode and name = this.getRawAlgorithmName() + } + + override string getAlgorithmName() { this.modeToNameMapping(this.getModeType(), result) } + } + + newtype TPaddingType = + PKCS1_v1_5() or // RSA encryption/signing padding + PKCS7() or // Standard block cipher padding (PKCS5 for 8-byte blocks) + ANSI_X9_23() or // Zero-padding except last byte = padding length + NoPadding() or // Explicit no-padding + OAEP() or // RSA OAEP padding + OtherPadding() + + abstract class PaddingAlgorithm extends Algorithm, TPaddingAlgorithm { + override string getAlgorithmType() { result = "PaddingAlgorithm" } + + /** + * Gets the type of this padding algorithm, e.g., "PKCS7" or "OAEP". + * + * When modeling a new padding algorithm, use this predicate to specify the type of the padding. + * + * If a type cannot be determined, the result is `OtherPadding`. + */ + abstract TPaddingType getPaddingType(); + + bindingset[type] + final private predicate paddingToNameMapping(TPaddingType type, string name) { + type instanceof PKCS1_v1_5 and name = "PKCS1_v1_5" + or + type instanceof PKCS7 and name = "PKCS7" + or + type instanceof ANSI_X9_23 and name = "ANSI_X9_23" + or + type instanceof NoPadding and name = "NoPadding" + or + type instanceof OAEP and name = "OAEP" + or + type instanceof OtherPadding and name = this.getRawAlgorithmName() + } + + override string getAlgorithmName() { this.paddingToNameMapping(this.getPaddingType(), result) } + } + + /** + * A helper type for distinguishing between block and stream ciphers. + */ + newtype TCipherStructureType = + Block() or + Stream() or + Asymmetric() or + UnknownCipherStructureType() + + private string getCipherStructureTypeString(TCipherStructureType type) { + type instanceof Block and result = "Block" + or + type instanceof Stream and result = "Stream" + or + type instanceof Asymmetric and result = "Asymmetric" + or + type instanceof UnknownCipherStructureType and result instanceof UnknownPropertyValue + } + + /** + * Symmetric algorithms + */ + newtype TCipherType = + AES() or + Camellia() or + DES() or + TripleDES() or + IDEA() or + CAST5() or + ChaCha20() or + RC4() or + RC5() or + RSA() or + OtherCipherType() + + abstract class CipherAlgorithm extends Algorithm, TCipherAlgorithm { + final LocatableElement getInstance() { this = TCipherAlgorithm(result) } + + final TCipherStructureType getCipherStructure() { + this.cipherFamilyToNameAndStructure(this.getCipherFamily(), _, result) + } + + final override string getAlgorithmName() { + this.cipherFamilyToNameAndStructure(this.getCipherFamily(), result, _) + } + + override string getAlgorithmType() { result = "CipherAlgorithm" } + + /** + * Gets the key size of this cipher, e.g., "128" or "256". + */ + abstract string getKeySize(Location location); + + /** + * Gets the type of this cipher, e.g., "AES" or "ChaCha20". + */ + abstract TCipherType getCipherFamily(); + + /** + * Gets the mode of operation of this cipher, e.g., "GCM" or "CBC". + */ + abstract ModeOfOperationAlgorithm getModeOfOperation(); + + /** + * Gets the padding scheme of this cipher, e.g., "PKCS7" or "NoPadding". + */ + abstract PaddingAlgorithm getPadding(); + + bindingset[type] + final private predicate cipherFamilyToNameAndStructure( + TCipherType type, string name, TCipherStructureType s + ) { + type instanceof AES and name = "AES" and s = Block() + or + type instanceof Camellia and name = "Camellia" and s = Block() + or + type instanceof DES and name = "DES" and s = Block() + or + type instanceof TripleDES and name = "TripleDES" and s = Block() + or + type instanceof IDEA and name = "IDEA" and s = Block() + or + type instanceof CAST5 and name = "CAST5" and s = Block() + or + type instanceof ChaCha20 and name = "ChaCha20" and s = Stream() + or + type instanceof RC4 and name = "RC4" and s = Stream() + or + type instanceof RC5 and name = "RC5" and s = Block() + or + type instanceof RSA and name = "RSA" and s = Asymmetric() + or + type instanceof OtherCipherType and + name = this.getRawAlgorithmName() and + s = UnknownCipherStructureType() + } + + //mode, padding scheme, keysize, block/stream, auth'd + //nodes = mode, padding scheme + //properties = keysize, block/stream, auth'd + //leave authd to lang specific + override NodeBase getChild(string edgeName) { + result = super.getChild(edgeName) + or + // [KNOWN_OR_UNKNOWN] + edgeName = "mode" and + if exists(this.getModeOfOperation()) + then result = this.getModeOfOperation() + else result = this + or + // [KNOWN_OR_UNKNOWN] + edgeName = "padding" and + if exists(this.getPadding()) then result = this.getPadding() else result = this + } + + override predicate properties(string key, string value, Location location) { + super.properties(key, value, location) + or + // [ALWAYS_KNOWN] - unknown case is handled in `getCipherStructureTypeString` + key = "structure" and + getCipherStructureTypeString(this.getCipherStructure()) = value and + location instanceof UnknownLocation + or + ( + // [KNOWN_OR_UNKNOWN] + key = "key_size" and + if exists(this.getKeySize(location)) + then value = this.getKeySize(location) + else ( + value instanceof UnknownPropertyValue and location instanceof UnknownLocation + ) + ) + } + } + + abstract class KEMAlgorithm extends TKeyEncapsulationAlgorithm, Algorithm { + final override string getAlgorithmType() { result = "KeyEncapsulationAlgorithm" } + } + + /** + * A Key Material Object + */ + private class KeyMaterialImpl extends Artifact, TKeyMaterial { + KeyMaterialInstance instance; + + KeyMaterialImpl() { this = TKeyMaterial(instance) } + + final override string getInternalType() { result = "KeyMaterial" } + + override Location getLocation() { result = instance.getLocation() } + + override DataFlowNode asOutputData() { result = instance.asOutputData() } + + override DataFlowNode getInputData() { result = instance.getInput() } + } + + final class KeyMaterial = KeyMaterialImpl; +} diff --git a/shared/cryptography/qlpack.yml b/shared/cryptography/qlpack.yml new file mode 100644 index 000000000000..768c64a0704e --- /dev/null +++ b/shared/cryptography/qlpack.yml @@ -0,0 +1,7 @@ +name: codeql/cryptography +version: 0.0.0-dev +groups: shared +library: true +dependencies: + codeql/util: ${workspace} +warnOnImplicitThis: true \ No newline at end of file