From 00724d39dd09f37248e328ef256b8db158dfeae7 Mon Sep 17 00:00:00 2001 From: Ezekiel Warren Date: Sun, 1 Sep 2024 22:42:45 -0700 Subject: [PATCH 1/9] feat: hashing sources for caching --- src/main.cpp2 | 37 ++++- src/xxh3.cppm | 364 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 398 insertions(+), 3 deletions(-) create mode 100644 src/xxh3.cppm diff --git a/src/main.cpp2 b/src/main.cpp2 index e88e94e..73de00a 100644 --- a/src/main.cpp2 +++ b/src/main.cpp2 @@ -2,6 +2,7 @@ import std.compat; import cpp2b; import dylib; import nlohmann.json; +import xxh3; fs: namespace == std::filesystem; json: type == nlohmann::json; @@ -758,7 +759,22 @@ contains_target: (targets, value: std::string) -> bool = { return false; } +hash_file: (p: fs::path) -> u64 = { + f: std::ifstream = (p, std::ios::binary | std::ios::ate); + size: size_t = unsafe_cast(f.tellg()); + f.seekg(0, std::ios::beg); + buffer: std::vector = (); + buffer.resize(size); + f.read(buffer.data(), size); + if f.fail() { return 0; } + return constexpr_xxh3::XXH3_64bits(buffer.data(), buffer.size()); +} + +cpp2b_data_file: type = { +} + do_build: (targets: std::vector) -> (stuff: full_build_info, exit_code: int) = { + cwd := fs::current_path(); build_cpp2_dir := fs::current_path(); stuff = (); @@ -786,9 +802,9 @@ do_build: (targets: std::vector) -> (stuff: full_build_info, exit_c warn_if_error("remove", p, ec); } - src_loop: for fs::recursive_directory_iterator(fs::current_path(), fs::directory_options::follow_directory_symlink) do(p: fs::path) { + src_loop: for fs::recursive_directory_iterator(cwd, fs::directory_options::follow_directory_symlink) do(p: fs::path) { if p.extension() == ".cpp2" { - rel_path := fs::relative(p, fs::current_path()); + rel_path := fs::relative(p, cwd); for rel_path do(rel_path_comp) { if rel_path_comp.string().starts_with(".") { continue src_loop; @@ -816,15 +832,30 @@ do_build: (targets: std::vector) -> (stuff: full_build_info, exit_c transpile_futures: std::vector> = (); cpp2b_parse_futures: std::vector> = (); + file_hash_futures: std::unordered_map> = (); + file_hash_futures: std::unordered_map> = (); + transpile_futures.reserve(cpp2_source_files.size()); cpp2b_parse_futures.reserve(cpp2_source_files.size()); - + file_hash_futures.reserve(cpp2_source_files.size()); for cpp2_source_files do(src_file: fs::path) { + file_hash_futures.insert(std::make_pair( + src_file, + std::async(std::launch::async, hash_file, src_file) + )); transpile_futures.emplace_back(std::async(std::launch::async, transpile_cpp2, src_file, transpile_source_dir)); cpp2b_parse_futures.emplace_back(std::async(std::launch::async, cpp2b_parse_source, src_file)); } + for file_hash_futures do(inout entry) { + p := entry.first; + hash_fut := entry.second&; + hash := hash_fut*.get(); + + log_info("{} hash is {}", p.generic_string(), hash); + } + for transpile_futures do(inout fut) { info := fut.get(); if info.cppfront_exit_code != 0 { diff --git a/src/xxh3.cppm b/src/xxh3.cppm new file mode 100644 index 0000000..7e4a927 --- /dev/null +++ b/src/xxh3.cppm @@ -0,0 +1,364 @@ +/* +BSD 2-Clause License + +constexpr-xxh3 - C++20 constexpr implementation of the XXH3 64-bit variant of xxHash +Copyright (c) 2021-2023, chys +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/* +This file uses code from Yann Collet's xxHash implementation. +Original xxHash copyright notice: + +xxHash - Extremely Fast Hash algorithm +Header File +Copyright (C) 2012-2020 Yann Collet +*/ + +/* +Modified by Ezekiel Warren +- removed consteval and _const suffix +*/ + +module; + +#include +#include +#include // for std::data, std::size +#include +#include + +export module xxh3; + +export namespace constexpr_xxh3 { + +template +concept ByteType = (std::is_integral_v && sizeof(T) == 1) +#if defined __cpp_lib_byte && __cpp_lib_byte >= 201603 + || std::is_same_v +#endif + ; + +template +concept BytePtrType = requires (T ptr) { + requires std::is_pointer_v; + requires ByteType>; +}; + +template +concept BytesType = requires (const T& bytes) { + { std::data(bytes) }; + requires BytePtrType; + // -> std::convertible_to is not supported widely enough + { static_cast(std::size(bytes)) }; +}; + +inline constexpr uint32_t swap32(uint32_t x) noexcept { + return ((x << 24) & 0xff000000) | ((x << 8) & 0x00ff0000) | + ((x >> 8) & 0x0000ff00) | ((x >> 24) & 0x000000ff); +} + +template +inline constexpr uint32_t readLE32(const T* ptr) noexcept { + return uint8_t(ptr[0]) | uint32_t(uint8_t(ptr[1])) << 8 | + uint32_t(uint8_t(ptr[2])) << 16 | uint32_t(uint8_t(ptr[3])) << 24; +} + +inline constexpr uint64_t swap64(uint64_t x) noexcept { + return ((x << 56) & 0xff00000000000000ULL) | + ((x << 40) & 0x00ff000000000000ULL) | + ((x << 24) & 0x0000ff0000000000ULL) | + ((x << 8) & 0x000000ff00000000ULL) | + ((x >> 8) & 0x00000000ff000000ULL) | + ((x >> 24) & 0x0000000000ff0000ULL) | + ((x >> 40) & 0x000000000000ff00ULL) | + ((x >> 56) & 0x00000000000000ffULL); +} + +template +inline constexpr uint64_t readLE64(const T* ptr) noexcept { + return readLE32(ptr) | uint64_t(readLE32(ptr + 4)) << 32; +} + +inline constexpr void writeLE64(uint8_t* dst, uint64_t v) noexcept { + for (int i = 0; i < 8; ++i) dst[i] = uint8_t(v >> (i * 8)); +} + +inline constexpr uint32_t PRIME32_1 = 0x9E3779B1U; +inline constexpr uint32_t PRIME32_2 = 0x85EBCA77U; +inline constexpr uint32_t PRIME32_3 = 0xC2B2AE3DU; + +inline constexpr uint64_t PRIME64_1 = 0x9E3779B185EBCA87ULL; +inline constexpr uint64_t PRIME64_2 = 0xC2B2AE3D27D4EB4FULL; +inline constexpr uint64_t PRIME64_3 = 0x165667B19E3779F9ULL; +inline constexpr uint64_t PRIME64_4 = 0x85EBCA77C2B2AE63ULL; +inline constexpr uint64_t PRIME64_5 = 0x27D4EB2F165667C5ULL; + +inline constexpr size_t SECRET_DEFAULT_SIZE = 192; +inline constexpr size_t SECRET_SIZE_MIN = 136; + +inline constexpr uint8_t kSecret[SECRET_DEFAULT_SIZE]{ + 0xb8, 0xfe, 0x6c, 0x39, 0x23, 0xa4, 0x4b, 0xbe, 0x7c, 0x01, 0x81, 0x2c, + 0xf7, 0x21, 0xad, 0x1c, 0xde, 0xd4, 0x6d, 0xe9, 0x83, 0x90, 0x97, 0xdb, + 0x72, 0x40, 0xa4, 0xa4, 0xb7, 0xb3, 0x67, 0x1f, 0xcb, 0x79, 0xe6, 0x4e, + 0xcc, 0xc0, 0xe5, 0x78, 0x82, 0x5a, 0xd0, 0x7d, 0xcc, 0xff, 0x72, 0x21, + 0xb8, 0x08, 0x46, 0x74, 0xf7, 0x43, 0x24, 0x8e, 0xe0, 0x35, 0x90, 0xe6, + 0x81, 0x3a, 0x26, 0x4c, 0x3c, 0x28, 0x52, 0xbb, 0x91, 0xc3, 0x00, 0xcb, + 0x88, 0xd0, 0x65, 0x8b, 0x1b, 0x53, 0x2e, 0xa3, 0x71, 0x64, 0x48, 0x97, + 0xa2, 0x0d, 0xf9, 0x4e, 0x38, 0x19, 0xef, 0x46, 0xa9, 0xde, 0xac, 0xd8, + 0xa8, 0xfa, 0x76, 0x3f, 0xe3, 0x9c, 0x34, 0x3f, 0xf9, 0xdc, 0xbb, 0xc7, + 0xc7, 0x0b, 0x4f, 0x1d, 0x8a, 0x51, 0xe0, 0x4b, 0xcd, 0xb4, 0x59, 0x31, + 0xc8, 0x9f, 0x7e, 0xc9, 0xd9, 0x78, 0x73, 0x64, 0xea, 0xc5, 0xac, 0x83, + 0x34, 0xd3, 0xeb, 0xc3, 0xc5, 0x81, 0xa0, 0xff, 0xfa, 0x13, 0x63, 0xeb, + 0x17, 0x0d, 0xdd, 0x51, 0xb7, 0xf0, 0xda, 0x49, 0xd3, 0x16, 0x55, 0x26, + 0x29, 0xd4, 0x68, 0x9e, 0x2b, 0x16, 0xbe, 0x58, 0x7d, 0x47, 0xa1, 0xfc, + 0x8f, 0xf8, 0xb8, 0xd1, 0x7a, 0xd0, 0x31, 0xce, 0x45, 0xcb, 0x3a, 0x8f, + 0x95, 0x16, 0x04, 0x28, 0xaf, 0xd7, 0xfb, 0xca, 0xbb, 0x4b, 0x40, 0x7e, +}; + +inline constexpr std::pair mult64to128( + uint64_t lhs, uint64_t rhs) noexcept { + uint64_t lo_lo = uint64_t(uint32_t(lhs)) * uint32_t(rhs); + uint64_t hi_lo = (lhs >> 32) * uint32_t(rhs); + uint64_t lo_hi = uint32_t(lhs) * (rhs >> 32); + uint64_t hi_hi = (lhs >> 32) * (rhs >> 32); + uint64_t cross = (lo_lo >> 32) + uint32_t(hi_lo) + lo_hi; + uint64_t upper = (hi_lo >> 32) + (cross >> 32) + hi_hi; + uint64_t lower = (cross << 32) | uint32_t(lo_lo); + return {lower, upper}; +} + +inline constexpr uint64_t mul128_fold64(uint64_t lhs, uint64_t rhs) noexcept { +#if defined __GNUC__ && __WORDSIZE >= 64 + // It appears both GCC and Clang support evaluating __int128 as constexpr + auto product = static_cast(lhs) * rhs; + return uint64_t(product >> 64) ^ uint64_t(product); +#else + auto product = mult64to128(lhs, rhs); + return product.first ^ product.second; +#endif +} + +inline constexpr uint64_t XXH64_avalanche(uint64_t h) noexcept { + h = (h ^ (h >> 33)) * PRIME64_2; + h = (h ^ (h >> 29)) * PRIME64_3; + return h ^ (h >> 32); +} + +inline constexpr uint64_t XXH3_avalanche(uint64_t h) noexcept { + h = (h ^ (h >> 37)) * 0x165667919E3779F9ULL; + return h ^ (h >> 32); +} + +inline constexpr uint64_t rrmxmx(uint64_t h, uint64_t len) noexcept { + h ^= ((h << 49) | (h >> 15)) ^ ((h << 24) | (h >> 40)); + h *= 0x9FB21C651E98DF25ULL; + h ^= (h >> 35) + len; + h *= 0x9FB21C651E98DF25ULL; + return h ^ (h >> 28); +} + +template +constexpr uint64_t mix16B(const T* input, const S* secret, + uint64_t seed) noexcept { + return mul128_fold64(readLE64(input) ^ (readLE64(secret) + seed), + readLE64(input + 8) ^ (readLE64(secret + 8) - seed)); +} + +inline constexpr size_t STRIPE_LEN = 64; +inline constexpr size_t SECRET_CONSUME_RATE = 8; +inline constexpr size_t ACC_NB = STRIPE_LEN / sizeof(uint64_t); + +template +constexpr void accumulate_512(uint64_t* acc, const T* input, + const S* secret) noexcept { + for (size_t i = 0; i < ACC_NB; i++) { + uint64_t data_val = readLE64(input + 8 * i); + uint64_t data_key = data_val ^ readLE64(secret + i * 8); + acc[i ^ 1] += data_val; + acc[i] += uint32_t(data_key) * (data_key >> 32); + } +} + +template +constexpr uint64_t hashLong_64b_internal(const T* input, size_t len, + const S* secret, + size_t secretSize) noexcept { + uint64_t acc[ACC_NB]{PRIME32_3, PRIME64_1, PRIME64_2, PRIME64_3, + PRIME64_4, PRIME32_2, PRIME64_5, PRIME32_1}; + size_t nbStripesPerBlock = (secretSize - STRIPE_LEN) / SECRET_CONSUME_RATE; + size_t block_len = STRIPE_LEN * nbStripesPerBlock; + size_t nb_blocks = (len - 1) / block_len; + + for (size_t n = 0; n < nb_blocks; n++) { + for (size_t i = 0; i < nbStripesPerBlock; i++) + accumulate_512(acc, input + n * block_len + i * STRIPE_LEN, + secret + i * SECRET_CONSUME_RATE); + for (size_t i = 0; i < ACC_NB; i++) + acc[i] = (acc[i] ^ (acc[i] >> 47) ^ + readLE64(secret + secretSize - STRIPE_LEN + 8 * i)) * + PRIME32_1; + } + + size_t nbStripes = ((len - 1) - (block_len * nb_blocks)) / STRIPE_LEN; + for (size_t i = 0; i < nbStripes; i++) + accumulate_512(acc, input + nb_blocks * block_len + i * STRIPE_LEN, + secret + i * SECRET_CONSUME_RATE); + accumulate_512(acc, input + len - STRIPE_LEN, + secret + secretSize - STRIPE_LEN - 7); + uint64_t result = len * PRIME64_1; + for (size_t i = 0; i < 4; i++) + result += + mul128_fold64(acc[2 * i] ^ readLE64(secret + 11 + 16 * i), + acc[2 * i + 1] ^ readLE64(secret + 11 + 16 * i + 8)); + return XXH3_avalanche(result); +} + +template +constexpr uint64_t XXH3_64bits_internal(const T* input, size_t len, + uint64_t seed, const S* secret, + size_t secretLen, + HashLong f_hashLong) noexcept { + if (len == 0) { + return XXH64_avalanche(seed ^ + (readLE64(secret + 56) ^ readLE64(secret + 64))); + } else if (len < 4) { + uint64_t keyed = ((uint32_t(uint8_t(input[0])) << 16) | + (uint32_t(uint8_t(input[len >> 1])) << 24) | + uint8_t(input[len - 1]) | (uint32_t(len) << 8)) ^ + ((readLE32(secret) ^ readLE32(secret + 4)) + seed); + return XXH64_avalanche(keyed); + } else if (len <= 8) { + uint64_t keyed = + (readLE32(input + len - 4) + (uint64_t(readLE32(input)) << 32)) ^ + ((readLE64(secret + 8) ^ readLE64(secret + 16)) - + (seed ^ (uint64_t(swap32(uint32_t(seed))) << 32))); + return rrmxmx(keyed, len); + } else if (len <= 16) { + uint64_t input_lo = + readLE64(input) ^ + ((readLE64(secret + 24) ^ readLE64(secret + 32)) + seed); + uint64_t input_hi = + readLE64(input + len - 8) ^ + ((readLE64(secret + 40) ^ readLE64(secret + 48)) - seed); + uint64_t acc = + len + swap64(input_lo) + input_hi + mul128_fold64(input_lo, input_hi); + return XXH3_avalanche(acc); + } else if (len <= 128) { + uint64_t acc = len * PRIME64_1; + size_t secret_off = 0; + for (size_t i = 0, j = len; j > i; i += 16, j -= 16) { + acc += mix16B(input + i, secret + secret_off, seed); + acc += mix16B(input + j - 16, secret + secret_off + 16, seed); + secret_off += 32; + } + return XXH3_avalanche(acc); + } else if (len <= 240) { + uint64_t acc = len * PRIME64_1; + for (size_t i = 0; i < 128; i += 16) + acc += mix16B(input + i, secret + i, seed); + acc = XXH3_avalanche(acc); + for (size_t i = 128; i < len / 16 * 16; i += 16) + acc += mix16B(input + i, secret + (i - 128) + 3, seed); + acc += mix16B(input + len - 16, secret + SECRET_SIZE_MIN - 17, seed); + return XXH3_avalanche(acc); + } else { + return f_hashLong(input, len, seed, secret, secretLen); + } +} + +template +constexpr size_t bytes_size(const Bytes& bytes) noexcept { + return std::size(bytes); +} + +template +constexpr size_t bytes_size(T (&)[N]) noexcept { + return (N ? N - 1 : 0); +} + +/// Basic interfaces + +template +constexpr uint64_t XXH3_64bits(const T* input, size_t len) noexcept { + return XXH3_64bits_internal( + input, len, 0, kSecret, sizeof(kSecret), + [](const T* input, size_t len, uint64_t, const void*, + size_t) constexpr noexcept { + return hashLong_64b_internal(input, len, kSecret, sizeof(kSecret)); + }); +} + +template +constexpr uint64_t XXH3_64bits_withSecret(const T* input, size_t len, + const S* secret, + size_t secretSize) noexcept { + return XXH3_64bits_internal( + input, len, 0, secret, secretSize, + [](const T* input, size_t len, uint64_t, const S* secret, + size_t secretLen) constexpr noexcept { + return hashLong_64b_internal(input, len, secret, secretLen); + }); +} + +template +constexpr uint64_t XXH3_64bits_withSeed(const T* input, size_t len, + uint64_t seed) noexcept { + if (seed == 0) return XXH3_64bits(input, len); + return XXH3_64bits_internal( + input, len, seed, kSecret, sizeof(kSecret), + [](const T* input, size_t len, uint64_t seed, const void*, + size_t) constexpr noexcept { + uint8_t secret[SECRET_DEFAULT_SIZE]; + for (size_t i = 0; i < SECRET_DEFAULT_SIZE; i += 16) { + writeLE64(secret + i, readLE64(kSecret + i) + seed); + writeLE64(secret + i + 8, readLE64(kSecret + i + 8) - seed); + } + return hashLong_64b_internal(input, len, secret, sizeof(secret)); + }); +} + +/// Convenient interfaces + +template +constexpr uint64_t XXH3_64bits(const Bytes& input) noexcept { + return XXH3_64bits(std::data(input), bytes_size(input)); +} + +template +constexpr uint64_t XXH3_64bits_withSecret(const Bytes& input, + const Secret& secret) noexcept { + return XXH3_64bits_withSecret(std::data(input), bytes_size(input), + std::data(secret), bytes_size(secret)); +} + +template +constexpr uint64_t XXH3_64bits_withSeed(const Bytes& input, + uint64_t seed) noexcept { + return XXH3_64bits_withSeed(std::data(input), bytes_size(input), seed); +} + +} // namespace constexpr_xxh3 + + From d12107485063e4fbab03a69f8d199ad7a0ffacdb Mon Sep 17 00:00:00 2001 From: Ezekiel Warren Date: Sun, 1 Sep 2024 23:40:09 -0700 Subject: [PATCH 2/9] chore: just export whats needed in xxh3 --- src/main.cpp2 | 6 +----- src/xxh3.cppm | 17 ++++++----------- 2 files changed, 7 insertions(+), 16 deletions(-) diff --git a/src/main.cpp2 b/src/main.cpp2 index 73de00a..5f75cb4 100644 --- a/src/main.cpp2 +++ b/src/main.cpp2 @@ -767,10 +767,7 @@ hash_file: (p: fs::path) -> u64 = { buffer.resize(size); f.read(buffer.data(), size); if f.fail() { return 0; } - return constexpr_xxh3::XXH3_64bits(buffer.data(), buffer.size()); -} - -cpp2b_data_file: type = { + return XXH3_64bits(buffer.data(), buffer.size()); } do_build: (targets: std::vector) -> (stuff: full_build_info, exit_code: int) = { @@ -833,7 +830,6 @@ do_build: (targets: std::vector) -> (stuff: full_build_info, exit_c transpile_futures: std::vector> = (); cpp2b_parse_futures: std::vector> = (); file_hash_futures: std::unordered_map> = (); - file_hash_futures: std::unordered_map> = (); transpile_futures.reserve(cpp2_source_files.size()); cpp2b_parse_futures.reserve(cpp2_source_files.size()); diff --git a/src/xxh3.cppm b/src/xxh3.cppm index 7e4a927..ae82e94 100644 --- a/src/xxh3.cppm +++ b/src/xxh3.cppm @@ -51,8 +51,6 @@ module; export module xxh3; -export namespace constexpr_xxh3 { - template concept ByteType = (std::is_integral_v && sizeof(T) == 1) #if defined __cpp_lib_byte && __cpp_lib_byte >= 201603 @@ -300,7 +298,7 @@ constexpr size_t bytes_size(T (&)[N]) noexcept { /// Basic interfaces -template +export template constexpr uint64_t XXH3_64bits(const T* input, size_t len) noexcept { return XXH3_64bits_internal( input, len, 0, kSecret, sizeof(kSecret), @@ -310,7 +308,7 @@ constexpr uint64_t XXH3_64bits(const T* input, size_t len) noexcept { }); } -template +export template constexpr uint64_t XXH3_64bits_withSecret(const T* input, size_t len, const S* secret, size_t secretSize) noexcept { @@ -322,7 +320,7 @@ constexpr uint64_t XXH3_64bits_withSecret(const T* input, size_t len, }); } -template +export template constexpr uint64_t XXH3_64bits_withSeed(const T* input, size_t len, uint64_t seed) noexcept { if (seed == 0) return XXH3_64bits(input, len); @@ -341,24 +339,21 @@ constexpr uint64_t XXH3_64bits_withSeed(const T* input, size_t len, /// Convenient interfaces -template +export template constexpr uint64_t XXH3_64bits(const Bytes& input) noexcept { return XXH3_64bits(std::data(input), bytes_size(input)); } -template +export template constexpr uint64_t XXH3_64bits_withSecret(const Bytes& input, const Secret& secret) noexcept { return XXH3_64bits_withSecret(std::data(input), bytes_size(input), std::data(secret), bytes_size(secret)); } -template +export template constexpr uint64_t XXH3_64bits_withSeed(const Bytes& input, uint64_t seed) noexcept { return XXH3_64bits_withSeed(std::data(input), bytes_size(input), seed); } -} // namespace constexpr_xxh3 - - From dcfb873bad29c1dd7028a91bdcf8802a4aa5f271 Mon Sep 17 00:00:00 2001 From: Ezekiel Warren Date: Mon, 2 Sep 2024 00:12:06 -0700 Subject: [PATCH 3/9] feat: storing source file hashes --- src/main.cpp2 | 51 ++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 50 insertions(+), 1 deletion(-) diff --git a/src/main.cpp2 b/src/main.cpp2 index 5f75cb4..89cc86a 100644 --- a/src/main.cpp2 +++ b/src/main.cpp2 @@ -830,10 +830,14 @@ do_build: (targets: std::vector) -> (stuff: full_build_info, exit_c transpile_futures: std::vector> = (); cpp2b_parse_futures: std::vector> = (); file_hash_futures: std::unordered_map> = (); + file_hashes: std::unordered_map = (); + prev_file_hashes: std::unordered_map = (); transpile_futures.reserve(cpp2_source_files.size()); cpp2b_parse_futures.reserve(cpp2_source_files.size()); file_hash_futures.reserve(cpp2_source_files.size()); + file_hashes.reserve(cpp2_source_files.size()); + prev_file_hashes.reserve(cpp2_source_files.size()); for cpp2_source_files do(src_file: fs::path) { file_hash_futures.insert(std::make_pair( @@ -848,8 +852,53 @@ do_build: (targets: std::vector) -> (stuff: full_build_info, exit_c p := entry.first; hash_fut := entry.second&; hash := hash_fut*.get(); + file_hashes.insert(std::make_pair(p, hash)); + } + + data_file: std::fstream = (".cache/cpp2/.data", std::ios::binary | std::ios::in); + + while data_file { + path_length: u16 = 0; + data_file >> path_length; + if !data_file { break; } + + p: std::string = ""; + p.resize(path_length); + data_file.read(p.data(), path_length); + if !data_file { break; } + + path_hash: u64 = 0; + data_file.read(reinterpret_cast<*char>(path_hash&), 8); + if !data_file { break; } + + prev_file_hashes[fs::path(p)] = path_hash; + } + + for file_hashes do(inout entry) { + p := entry.first; + hash := entry.second; + + if prev_file_hashes.contains(p) { + if prev_file_hashes.at(p) == hash { + log_info("{} no change", p.generic_string()); + } else { + log_info("{} changed", p.generic_string()); + } + } else { + log_info("new file {}", p.generic_string()); + } + } + + data_file.close(); + data_file.open(".cache/cpp2/.data", std::ios::binary | std::ios::out | std::ios::trunc); + + for file_hashes do(inout entry) { + p := entry.first.generic_string(); + hash := entry.second; - log_info("{} hash is {}", p.generic_string(), hash); + data_file << unsafe_cast(p.size()); + data_file.write(p.data(), p.size()); + data_file.write(reinterpret_cast<*char>(hash&), 8); } for transpile_futures do(inout fut) { From d9584dc6e81e3284528a9de26ac342a72dfc0192 Mon Sep 17 00:00:00 2001 From: Ezekiel Warren Date: Mon, 2 Sep 2024 09:03:04 -0700 Subject: [PATCH 4/9] chore: add xxh3 to build.cmd --- build.cmd | 14 ++++++++++++++ src/xxh3.cppm | 8 +++----- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/build.cmd b/build.cmd index d086bb4..889b26a 100644 --- a/build.cmd +++ b/build.cmd @@ -105,6 +105,19 @@ if %ERRORLEVEL% neq 0 ( exit %ERRORLEVEL% ) +echo INFO: compiling xxh3 module... +pushd %modules_dir% +cl /nologo ^ + /std:c++latest /W4 /MDd /EHsc ^ + /reference "%modules_dir%\std.ifc" ^ + /reference "%modules_dir%\std.compat.ifc" ^ + /c /interface /TP "%root_dir%src\xxh3.cppm" > NUL + +if %ERRORLEVEL% neq 0 ( + echo ERROR: failed to compile xxh3 module + exit %ERRORLEVEL% +) + echo INFO: compiling nlohmann.json module... pushd %modules_dir% cl /nologo ^ @@ -140,6 +153,7 @@ cl /nologo "%root_dir%.cache/cpp2/source/src/main.cpp" ^ /reference "%modules_dir%\std.compat.ifc" "%modules_dir%\std.compat.obj" ^ /reference "%modules_dir%\dylib.ifc" "%modules_dir%\dylib.obj" ^ /reference "%modules_dir%\nlohmann.json.ifc" "%modules_dir%\nlohmann.json.obj" ^ + /reference "%modules_dir%\xxh3.ifc" "%modules_dir%\xxh3.obj" ^ /reference "%modules_dir%\cpp2b.ifc" "%modules_dir%\cpp2b.obj" ^ /std:c++latest /W4 /MDd /EHsc ^ /DEBUG:FULL /Zi /FC ^ diff --git a/src/xxh3.cppm b/src/xxh3.cppm index ae82e94..4cb1a73 100644 --- a/src/xxh3.cppm +++ b/src/xxh3.cppm @@ -39,15 +39,13 @@ Copyright (C) 2012-2020 Yann Collet /* Modified by Ezekiel Warren - removed consteval and _const suffix +- using c++20 imports */ module; -#include -#include -#include // for std::data, std::size -#include -#include +import std; +import std.compat; export module xxh3; From 94d0577b2cc9e5410821462076b38b43a192209e Mon Sep 17 00:00:00 2001 From: Ezekiel Warren Date: Mon, 2 Sep 2024 13:25:53 -0700 Subject: [PATCH 5/9] chore: add xxh3 to build.sh --- build.sh | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/build.sh b/build.sh index aca435b..41f3895 100755 --- a/build.sh +++ b/build.sh @@ -161,6 +161,21 @@ if ! [ -f $MODULES_DIR/dylib.pcm ]; then cd $ROOT_DIR fi +if ! [ -f $MODULES_DIR/xxh3.pcm ]; then + log_info "compiling xxh3 module..." + + $CPP2B_COMPILER \ + -stdlib=libc++ \ + -std=c++23 \ + -fexperimental-library \ + -isystem $LIBCXX_INCLUDE_DIR/c++/v1 \ + -fprebuilt-module-path=$MODULES_DIR \ + "$ROOT_DIR/src/xxh3.cppm" \ + --precompile -o $MODULES_DIR/xxh3.pcm + + cd $ROOT_DIR +fi + if ! [ -f $MODULES_DIR/nlohmann.json.pcm ]; then log_info "compiling nlohmann.json module..." From 3b1a901e4f80a3b058861202ea3053366581b050 Mon Sep 17 00:00:00 2001 From: Ezekiel Warren Date: Sun, 22 Sep 2024 20:59:32 -0700 Subject: [PATCH 6/9] chore: wip --- src/main.cpp2 | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/src/main.cpp2 b/src/main.cpp2 index 89cc86a..657d7f4 100644 --- a/src/main.cpp2 +++ b/src/main.cpp2 @@ -702,10 +702,15 @@ unix_build_build_script_cmd: (compiler_cmd: std::string, info: cpp2b_source_buil build_build_script: (info: cpp2b_source_build_info) -> build_binary_result = { compiler :== cpp2b::compiler(); bin_outpath := fs::absolute(".cache/cpp2/bin") / fs::path(info.src).replace_extension(shared_library_extension()); + bin_cache_file_path := fs::path(bin_outpath.generic_string() + ".cpp2bcache") log_path := fs::absolute(".cache/cpp2/log/compile") / fs::path(info.src).replace_extension(".log"); ensure_dir(log_path.parent_path()); ensure_dir(bin_outpath.parent_path()); + if fs::exists(bin_outpath) && fs::exists(bin_cache_file_path) { + bin_cache_file: std::fstream = (bin_cache_file_path, std::ios::binary); + } + d := fs::absolute(modules_dir()); cmd_str: std::string = ""; if compiler == cpp2b::compiler_type::msvc { cmd_str = cl_build_build_script_cmd(info, bin_outpath); } @@ -855,21 +860,21 @@ do_build: (targets: std::vector) -> (stuff: full_build_info, exit_c file_hashes.insert(std::make_pair(p, hash)); } - data_file: std::fstream = (".cache/cpp2/.data", std::ios::binary | std::ios::in); + hash_data_file: std::fstream = (".cache/cpp2/.hash", std::ios::binary | std::ios::in); - while data_file { + while hash_data_file { path_length: u16 = 0; - data_file >> path_length; - if !data_file { break; } + hash_data_file >> path_length; + if !hash_data_file { break; } p: std::string = ""; p.resize(path_length); - data_file.read(p.data(), path_length); - if !data_file { break; } + hash_data_file.read(p.data(), path_length); + if !hash_data_file { break; } path_hash: u64 = 0; - data_file.read(reinterpret_cast<*char>(path_hash&), 8); - if !data_file { break; } + hash_data_file.read(reinterpret_cast<*char>(path_hash&), 8); + if !hash_data_file { break; } prev_file_hashes[fs::path(p)] = path_hash; } @@ -889,16 +894,16 @@ do_build: (targets: std::vector) -> (stuff: full_build_info, exit_c } } - data_file.close(); - data_file.open(".cache/cpp2/.data", std::ios::binary | std::ios::out | std::ios::trunc); + hash_data_file.close(); + hash_data_file.open(".cache/cpp2/.data", std::ios::binary | std::ios::out | std::ios::trunc); for file_hashes do(inout entry) { p := entry.first.generic_string(); hash := entry.second; - data_file << unsafe_cast(p.size()); - data_file.write(p.data(), p.size()); - data_file.write(reinterpret_cast<*char>(hash&), 8); + hash_data_file << unsafe_cast(p.size()); + hash_data_file.write(p.data(), p.size()); + hash_data_file.write(reinterpret_cast<*char>(hash&), 8); } for transpile_futures do(inout fut) { @@ -939,7 +944,6 @@ do_build: (targets: std::vector) -> (stuff: full_build_info, exit_c std::pair("std.compat", true), ); - for cpp1_module_source_files do(src_file: fs::path) { result := cpp2b_parse_cpp1_module_statements(std::ifstream(src_file)); From d9122d6c20f4e814e8dff8ae0916a7a51394ffd4 Mon Sep 17 00:00:00 2001 From: Ezekiel Warren Date: Mon, 23 Sep 2024 22:28:14 -0700 Subject: [PATCH 7/9] fix: wrong file for hash --- src/main.cpp2 | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/main.cpp2 b/src/main.cpp2 index 657d7f4..e129114 100644 --- a/src/main.cpp2 +++ b/src/main.cpp2 @@ -702,7 +702,7 @@ unix_build_build_script_cmd: (compiler_cmd: std::string, info: cpp2b_source_buil build_build_script: (info: cpp2b_source_build_info) -> build_binary_result = { compiler :== cpp2b::compiler(); bin_outpath := fs::absolute(".cache/cpp2/bin") / fs::path(info.src).replace_extension(shared_library_extension()); - bin_cache_file_path := fs::path(bin_outpath.generic_string() + ".cpp2bcache") + bin_cache_file_path: fs::path = bin_outpath.generic_string() + ".cpp2bcache"; log_path := fs::absolute(".cache/cpp2/log/compile") / fs::path(info.src).replace_extension(".log"); ensure_dir(log_path.parent_path()); ensure_dir(bin_outpath.parent_path()); @@ -879,7 +879,7 @@ do_build: (targets: std::vector) -> (stuff: full_build_info, exit_c prev_file_hashes[fs::path(p)] = path_hash; } - for file_hashes do(inout entry) { + for file_hashes do(entry) { p := entry.first; hash := entry.second; @@ -895,7 +895,7 @@ do_build: (targets: std::vector) -> (stuff: full_build_info, exit_c } hash_data_file.close(); - hash_data_file.open(".cache/cpp2/.data", std::ios::binary | std::ios::out | std::ios::trunc); + hash_data_file.open(".cache/cpp2/.hash", std::ios::binary | std::ios::out | std::ios::trunc); for file_hashes do(inout entry) { p := entry.first.generic_string(); From 0e142d8c6b117c35110ad0547900082406c7410a Mon Sep 17 00:00:00 2001 From: Ezekiel Warren Date: Tue, 26 Nov 2024 22:07:18 -0800 Subject: [PATCH 8/9] fix: add missing popd --- build.cmd | 1 + 1 file changed, 1 insertion(+) diff --git a/build.cmd b/build.cmd index 889b26a..5be1251 100644 --- a/build.cmd +++ b/build.cmd @@ -112,6 +112,7 @@ cl /nologo ^ /reference "%modules_dir%\std.ifc" ^ /reference "%modules_dir%\std.compat.ifc" ^ /c /interface /TP "%root_dir%src\xxh3.cppm" > NUL +popd if %ERRORLEVEL% neq 0 ( echo ERROR: failed to compile xxh3 module From c71152b6b8c66770acf193df50eb87232b4b4bdc Mon Sep 17 00:00:00 2001 From: Ezekiel Warren Date: Tue, 26 Nov 2024 22:10:33 -0800 Subject: [PATCH 9/9] chore: unsafe -> unchecked --- src/main.cpp2 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main.cpp2 b/src/main.cpp2 index e129114..f43de32 100644 --- a/src/main.cpp2 +++ b/src/main.cpp2 @@ -766,7 +766,7 @@ contains_target: (targets, value: std::string) -> bool = { hash_file: (p: fs::path) -> u64 = { f: std::ifstream = (p, std::ios::binary | std::ios::ate); - size: size_t = unsafe_cast(f.tellg()); + size: size_t = unchecked_cast(f.tellg()); f.seekg(0, std::ios::beg); buffer: std::vector = (); buffer.resize(size); @@ -901,7 +901,7 @@ do_build: (targets: std::vector) -> (stuff: full_build_info, exit_c p := entry.first.generic_string(); hash := entry.second; - hash_data_file << unsafe_cast(p.size()); + hash_data_file << unchecked_cast(p.size()); hash_data_file.write(p.data(), p.size()); hash_data_file.write(reinterpret_cast<*char>(hash&), 8); }