diff --git a/docs/source/pitfall.rst b/docs/source/pitfall.rst index 0c404007f..e044be07b 100644 --- a/docs/source/pitfall.rst +++ b/docs/source/pitfall.rst @@ -70,6 +70,54 @@ in the returned expression. Replacing ``auto tmp`` with ``xt::xarray tmp`` does not change anything, ``tmp`` is still an lvalue and thus captured by reference. +.. warning:: + + This issue is particularly subtle with reducer functions like :cpp:func:`xt::amax`, + :cpp:func:`xt::sum`, etc. Consider the following function: + + .. code:: + + template + xt::xtensor logSoftmax(const xt::xtensor &matrix) + { + xt::xtensor maxVals = xt::amax(matrix, {1}, xt::keep_dims); + auto shifted = matrix - maxVals; + auto expVals = xt::exp(shifted); + auto sumExp = xt::sum(expVals, {1}, xt::keep_dims); + return shifted - xt::log(sumExp); + } + + This function may produce incorrect results or crash, especially in optimized builds. + The issue is that ``shifted``, ``expVals``, and ``sumExp`` are all lazy expressions + that hold references to local variables. When the function returns, these local + variables are destroyed, and the returned expression contains dangling references. + + The fix is to use explicit container types to force evaluation: + + .. code:: + + template + xt::xtensor logSoftmax(const xt::xtensor &matrix) + { + xt::xtensor maxVals = xt::amax(matrix, {1}, xt::keep_dims); + xt::xtensor shifted = matrix - maxVals; + xt::xtensor expVals = xt::exp(shifted); + xt::xtensor sumExp = xt::sum(expVals, {1}, xt::keep_dims); + return shifted - xt::log(sumExp); + } + + Alternatively, you can use :cpp:func:`xt::eval` to force evaluation: + + .. code:: + + auto shifted = xt::eval(matrix - maxVals); + + Or use the immediate evaluation strategy for reducers: + + .. code:: + + auto sumExp = xt::sum(expVals, {1}, xt::evaluation_strategy::immediate | xt::keep_dims); + Random numbers not consistent ----------------------------- diff --git a/test/test_xmath.cpp b/test/test_xmath.cpp index 17bc57895..edaafb344 100644 --- a/test/test_xmath.cpp +++ b/test/test_xmath.cpp @@ -969,4 +969,50 @@ namespace xt EXPECT_TRUE(xt::allclose(expected, unwrapped)); } } + + // Test for GitHub issue #2871: Proper handling of intermediate results + // This test documents the correct way to use reducers with keep_dims + // when intermediate expressions are needed. + TEST(xmath, issue_2871_intermediate_result_handling) + { + // This test verifies the correct pattern for using reducers with + // intermediate results. Using 'auto' with lazy expressions can lead + // to dangling references when the function returns. + + // The CORRECT way: use explicit container types for intermediate results + auto logSoftmax_correct = [](const xt::xtensor& matrix) + { + xt::xtensor maxVals = xt::amax(matrix, {1}, xt::keep_dims); + xt::xtensor shifted = matrix - maxVals; + xt::xtensor expVals = xt::exp(shifted); + xt::xtensor sumExp = xt::sum(expVals, {1}, xt::keep_dims); + return xt::xtensor(shifted - xt::log(sumExp)); + }; + + // Alternative CORRECT way: use xt::eval for intermediate results + auto logSoftmax_eval = [](const xt::xtensor& matrix) + { + auto maxVals = xt::eval(xt::amax(matrix, {1}, xt::keep_dims)); + auto shifted = xt::eval(matrix - maxVals); + auto expVals = xt::eval(xt::exp(shifted)); + auto sumExp = xt::eval(xt::sum(expVals, {1}, xt::keep_dims)); + return xt::xtensor(shifted - xt::log(sumExp)); + }; + + // Test data + xt::xtensor input = {{1.0, 2.0, 3.0}, {4.0, 5.0, 6.0}}; + + // Both implementations should produce the same result + auto result1 = logSoftmax_correct(input); + auto result2 = logSoftmax_eval(input); + + EXPECT_TRUE(xt::allclose(result1, result2)); + + // Verify the result is a valid log-softmax (rows sum to 0 in log space) + // exp(log_softmax).sum(axis=1) should equal 1 + auto exp_result = xt::exp(result1); + auto row_sums = xt::sum(exp_result, {1}); + xt::xtensor expected_sums = {1.0, 1.0}; + EXPECT_TRUE(xt::allclose(row_sums, expected_sums)); + } }