From 3e75327e808498177c19db932cdfac2c5507ee7d Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Fri, 12 Dec 2025 10:27:58 +0900
Subject: [PATCH] GH-48672, GH-48465: [Python] Add an option for truncating
 intraday milliseconds in Date64

---
 python/pyarrow/array.pxi                      |  36 ++-
 python/pyarrow/includes/libarrow_python.pxd   |   4 +
 python/pyarrow/pandas_compat.py               |   5 +-
 python/pyarrow/scalar.pxi                     |   9 +-
 .../src/arrow/python/arrow_to_pandas.cc       |  40 +++-
 .../src/arrow/python/arrow_to_pandas.h        |   5 +
 .../src/arrow/python/numpy_to_arrow.cc        |  15 +-
 .../pyarrow/src/arrow/python/numpy_to_arrow.h |   4 +-
 .../src/arrow/python/python_to_arrow.cc       |   9 +-
 .../src/arrow/python/python_to_arrow.h        |   5 +
 python/pyarrow/src/arrow/python/type_traits.h |   1 +
 python/pyarrow/table.pxi                      |  19 +-
 python/pyarrow/tests/test_array.py            | 210 ++++++++++++++++++
 13 files changed, 331 insertions(+), 31 deletions(-)
diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
index 575b628db3a..670a75fb202 100644
--- a/python/pyarrow/array.pxi
+++ b/python/pyarrow/array.pxi
@@ -27,7 +27,8 @@ cdef extern from "<variant>" namespace "std":
     T get[T](...)
 
 cdef _sequence_to_array(object sequence, object mask, object size,
-                        DataType type, CMemoryPool* pool, c_bool from_pandas):
+                        DataType type, CMemoryPool* pool, c_bool from_pandas,
+                        bint truncate_date64_time):
     cdef:
         int64_t c_size
         PyConversionOptions options
@@ -41,6 +42,7 @@ cdef _sequence_to_array(object sequence, object mask, object size,
 
     options.from_pandas = from_pandas
     options.ignore_timezone = os.environ.get('PYARROW_IGNORE_TIMEZONE', False)
+    options.truncate_date64_time = truncate_date64_time
 
     with nogil:
         chunked = GetResultValue(
@@ -81,7 +83,8 @@ cdef shared_ptr[CDataType] _ndarray_to_type(object values,
 
 
 cdef _ndarray_to_array(object values, object mask, DataType type,
-                       c_bool from_pandas, c_bool safe, CMemoryPool* pool):
+                       c_bool from_pandas, c_bool safe, CMemoryPool* pool,
+                       bint truncate_date64_time):
     cdef:
         shared_ptr[CChunkedArray] chunked_out
         shared_ptr[CDataType] c_type = _ndarray_to_type(values, type)
@@ -89,7 +92,7 @@ cdef _ndarray_to_array(object values, object mask, DataType type,
 
     with nogil:
         check_status(NdarrayToArrow(pool, values, mask, from_pandas,
-                                    c_type, cast_options, &chunked_out))
+                                    c_type, cast_options, truncate_date64_time, &chunked_out))
 
     if chunked_out.get().num_chunks() > 1:
         return pyarrow_wrap_chunked_array(chunked_out)
@@ -127,7 +130,7 @@ def _handle_arrow_array_protocol(obj, type, mask, size):
 
 
 def array(object obj, type=None, mask=None, size=None, from_pandas=None,
-          bint safe=True, MemoryPool memory_pool=None):
+          bint safe=True, MemoryPool memory_pool=None, bint truncate_date64_time=True):
     """
     Create pyarrow.Array instance from a Python object.
 
@@ -162,6 +165,10 @@ def array(object obj, type=None, mask=None, size=None, from_pandas=None,
     memory_pool : pyarrow.MemoryPool, optional
         If not passed, will allocate memory from the currently-set default
         memory pool.
+    truncate_date64_time : bool, default True
+        If True (default), truncate intraday milliseconds when converting Python
+        datetime objects to date64.
+        If False, preserve the full datetime including time components.
 
     Returns
     -------
@@ -313,7 +320,8 @@ def array(object obj, type=None, mask=None, size=None, from_pandas=None,
         elif (pandas_api.is_categorical(values) and
               type is not None and type.id != Type_DICTIONARY):
             result = _ndarray_to_array(
-                np.asarray(values), mask, type, c_from_pandas, safe, pool
+                np.asarray(
+                    values), mask, type, c_from_pandas, safe, pool, truncate_date64_time
             )
         elif pandas_api.is_categorical(values):
             if type is not None:
@@ -358,21 +366,22 @@ def array(object obj, type=None, mask=None, size=None, from_pandas=None,
                     values, obj.dtype, type)
             if type and type.id == _Type_RUN_END_ENCODED:
                 arr = _ndarray_to_array(
-                    values, mask, type.value_type, c_from_pandas, safe, pool)
+                    values, mask, type.value_type, c_from_pandas, safe, pool, truncate_date64_time)
                 result = _pc().run_end_encode(arr, run_end_type=type.run_end_type,
                                               memory_pool=memory_pool)
             else:
                 result = _ndarray_to_array(values, mask, type, c_from_pandas, safe,
-                                           pool)
+                                           pool, truncate_date64_time)
     else:
         if type and type.id == _Type_RUN_END_ENCODED:
             arr = _sequence_to_array(
-                obj, mask, size, type.value_type, pool, from_pandas)
+                obj, mask, size, type.value_type, pool, from_pandas, truncate_date64_time)
             result = _pc().run_end_encode(arr, run_end_type=type.run_end_type,
                                           memory_pool=memory_pool)
         # ConvertPySequence does strict conversion if type is explicitly passed
         else:
-            result = _sequence_to_array(obj, mask, size, type, pool, c_from_pandas)
+            result = _sequence_to_array(
+                obj, mask, size, type, pool, c_from_pandas, truncate_date64_time)
 
     if extension_type is not None:
         result = ExtensionArray.from_storage(extension_type, result)
@@ -880,7 +889,8 @@ cdef class _PandasConvertible(_Weakrefable):
             bint self_destruct=False,
             str maps_as_pydicts=None,
             types_mapper=None,
-            bint coerce_temporal_nanoseconds=False
+            bint coerce_temporal_nanoseconds=False,
+            bint truncate_date64_time=False
     ):
         """
         Convert to a pandas-compatible NumPy array or DataFrame, as appropriate
@@ -965,6 +975,10 @@ cdef class _PandasConvertible(_Weakrefable):
             default behavior in pandas version 1.x. Set this option to True if
             you'd like to use this coercion when using pandas version >= 2.0
             for backwards compatibility (not recommended otherwise).
+        truncate_date64_time : bool, default False
+            If True, truncate intraday milliseconds when converting date64 to pandas
+            datetime.
+            If False (default), preserve the full datetime including time components.
 
         Returns
         -------
@@ -1041,6 +1055,7 @@ cdef class _PandasConvertible(_Weakrefable):
             split_blocks=split_blocks,
             self_destruct=self_destruct,
             maps_as_pydicts=maps_as_pydicts,
+            truncate_date64_time=truncate_date64_time,
             coerce_temporal_nanoseconds=coerce_temporal_nanoseconds
         )
         return self._to_pandas(options, categories=categories,
@@ -1063,6 +1078,7 @@ cdef PandasOptions _convert_pandas_options(dict options):
     result.self_destruct = options['self_destruct']
     result.coerce_temporal_nanoseconds = options['coerce_temporal_nanoseconds']
     result.ignore_timezone = os.environ.get('PYARROW_IGNORE_TIMEZONE', False)
+    result.truncate_date64_time = options['truncate_date64_time']
 
     maps_as_pydicts = options['maps_as_pydicts']
     if maps_as_pydicts is None:
diff --git a/python/pyarrow/includes/libarrow_python.pxd b/python/pyarrow/includes/libarrow_python.pxd
index 4724c52ccb5..ef51f90efa3 100644
--- a/python/pyarrow/includes/libarrow_python.pxd
+++ b/python/pyarrow/includes/libarrow_python.pxd
@@ -66,6 +66,7 @@ cdef extern from "arrow/python/api.h" namespace "arrow::py" nogil:
         c_bool from_pandas
         c_bool ignore_timezone
         c_bool strict
+        c_bool truncate_date64_time
 
     # TODO Some functions below are not actually "nogil"
 
@@ -81,12 +82,14 @@ cdef extern from "arrow/python/api.h" namespace "arrow::py" nogil:
     CStatus NdarrayToArrow(CMemoryPool* pool, object ao, object mo,
                            c_bool from_pandas,
                            const shared_ptr[CDataType]& type,
+                           c_bool truncate_date64_time,
                            shared_ptr[CChunkedArray]* out)
 
     CStatus NdarrayToArrow(CMemoryPool* pool, object ao, object mo,
                            c_bool from_pandas,
                            const shared_ptr[CDataType]& type,
                            const CCastOptions& cast_options,
+                           c_bool truncate_date64_time,
                            shared_ptr[CChunkedArray]* out)
 
     CStatus NdarrayToTensor(CMemoryPool* pool, object ao,
@@ -193,6 +196,7 @@ cdef extern from "arrow/python/api.h" namespace "arrow::py" nogil:
         c_bool coerce_temporal_nanoseconds
         c_bool ignore_timezone
         c_bool deduplicate_objects
+        c_bool truncate_date64_time
         c_bool safe_cast
         c_bool split_blocks
         c_bool self_destruct
diff --git a/python/pyarrow/pandas_compat.py b/python/pyarrow/pandas_compat.py
index dfed76d3711..dc0d6c35bc9 100644
--- a/python/pyarrow/pandas_compat.py
+++ b/python/pyarrow/pandas_compat.py
@@ -597,7 +597,7 @@ def dataframe_to_types(df, preserve_index, columns=None):
 
 
 def dataframe_to_arrays(df, schema, preserve_index, nthreads=1, columns=None,
-                        safe=True):
+                        safe=True, truncate_date64_time=True):
     (all_names,
      column_names,
      column_field_names,
@@ -630,7 +630,8 @@ def convert_column(col, field):
             type_ = field.type
 
         try:
-            result = pa.array(col, type=type_, from_pandas=True, safe=safe)
+            result = pa.array(col, type=type_, from_pandas=True, safe=safe,
+                              truncate_date64_time=truncate_date64_time)
         except (pa.ArrowInvalid,
                 pa.ArrowNotImplementedError,
                 pa.ArrowTypeError) as e:
diff --git a/python/pyarrow/scalar.pxi b/python/pyarrow/scalar.pxi
index 83cabcf447d..7633dd30e45 100644
--- a/python/pyarrow/scalar.pxi
+++ b/python/pyarrow/scalar.pxi
@@ -1598,7 +1598,8 @@ cdef object get_scalar_class_from_type(
         return _scalar_classes[data_type.id()]
 
 
-def scalar(value, type=None, *, from_pandas=None, MemoryPool memory_pool=None):
+def scalar(value, type=None, *, from_pandas=None, MemoryPool memory_pool=None,
+           bint truncate_date64_time=True):
     """
     Create a pyarrow.Scalar instance from a Python object.
 
@@ -1616,6 +1617,10 @@ def scalar(value, type=None, *, from_pandas=None, MemoryPool memory_pool=None):
     memory_pool : pyarrow.MemoryPool, optional
         If not passed, will allocate memory from the currently-set default
         memory pool.
+    truncate_date64_time : bool, default True
+        If True (default), truncate intraday milliseconds when converting Python
+        datetime objects to date64.
+        If False, preserve the full datetime including time components.
 
     Returns
     -------
@@ -1668,6 +1673,8 @@ def scalar(value, type=None, *, from_pandas=None, MemoryPool memory_pool=None):
     else:
         options.from_pandas = from_pandas
 
+    options.truncate_date64_time = truncate_date64_time
+
     value = [value]
     with nogil:
         chunked = GetResultValue(ConvertPySequence(value, None, options, pool))
diff --git a/python/pyarrow/src/arrow/python/arrow_to_pandas.cc b/python/pyarrow/src/arrow/python/arrow_to_pandas.cc
index ed4f394362a..52fffff9af4 100644
--- a/python/pyarrow/src/arrow/python/arrow_to_pandas.cc
+++ b/python/pyarrow/src/arrow/python/arrow_to_pandas.cc
@@ -1547,6 +1547,26 @@ void ConvertDatesShift(const ChunkedArray& data, int64_t* out_values) {
   }
 }
 
+template <int64_t SHIFT>
+inline void ConvertDatetimeWithTruncation(const ChunkedArray& data, int64_t* out_values) {
+  for (int c = 0; c < data.num_chunks(); c++) {
+    const auto& arr = *data.chunk(c);
+    const int64_t* in_values = GetPrimitiveValues<int64_t>(arr);
+    for (int64_t i = 0; i < arr.length(); ++i) {
+      if (arr.IsNull(i)) {
+        *out_values++ = kPandasTimestampNull;
+      } else {
+        int64_t truncated = in_values[i] - in_values[i] % kMillisecondsInDay;
+        if constexpr (SHIFT == 1) {
+          *out_values++ = truncated;
+        } else {
+          *out_values++ = truncated * SHIFT;
+        }
+      }
+    }
+  }
+}
+
 class DatetimeDayWriter : public TypedPandasWriter<NPY_DATETIME> {
  public:
   using TypedPandasWriter<NPY_DATETIME>::TypedPandasWriter;
@@ -1617,7 +1637,14 @@ class DatetimeMilliWriter : public DatetimeWriter<TimeUnit::MILLI> {
       // Convert from days since epoch to datetime64[ms]
       ConvertDatetime<int32_t, 86400000L>(*data, out_values);
     } else if (type == Type::DATE64) {
-      ConvertNumericNullable<int64_t>(*data, kPandasTimestampNull, out_values);
+      // Date64Type is millisecond timestamp
+      if (this->options_.truncate_date64_time) {
+        // Truncate intraday milliseconds
+        ConvertDatetimeWithTruncation<1L>(*data, out_values);
+      } else {
+        // Preserve time components
+        ConvertNumericNullable<int64_t>(*data, kPandasTimestampNull, out_values);
+      }
     } else {
       const auto& ts_type = checked_cast<const TimestampType&>(*data->type());
       ARROW_DCHECK_EQ(TimeUnit::MILLI, ts_type.unit())
@@ -1652,9 +1679,14 @@ class DatetimeNanoWriter : public DatetimeWriter<TimeUnit::NANO> {
       // Convert from days since epoch to datetime64[ns]
       ConvertDatetime<int32_t, kNanosecondsInDay>(*data, out_values);
     } else if (type == Type::DATE64) {
-      // Date64Type is millisecond timestamp stored as int64_t
-      // TODO(wesm): Do we want to make sure to zero out the milliseconds?
-      ConvertDatetime<int64_t, 1000000L>(*data, out_values);
+      // Date64Type is millisecond timestamp; convert to nanoseconds
+      if (this->options_.truncate_date64_time) {
+        // Truncate intraday milliseconds and convert to nanoseconds
+        ConvertDatetimeWithTruncation<1000000L>(*data, out_values);
+      } else {
+        // Preserve time components and convert to nanoseconds
+        ConvertDatetime<int64_t, 1000000L>(*data, out_values);
+      }
     } else if (type == Type::TIMESTAMP) {
       const auto& ts_type = checked_cast<const TimestampType&>(*data->type());
 
diff --git a/python/pyarrow/src/arrow/python/arrow_to_pandas.h b/python/pyarrow/src/arrow/python/arrow_to_pandas.h
index b4e91e6cf5a..c7dc65b84e1 100644
--- a/python/pyarrow/src/arrow/python/arrow_to_pandas.h
+++ b/python/pyarrow/src/arrow/python/arrow_to_pandas.h
@@ -89,6 +89,11 @@ struct PandasOptions {
   /// objects
   bool deduplicate_objects = false;
 
+  /// If true, truncate intraday milliseconds when converting date64 to pandas
+  /// datetime (default false to preserve time components).
+  /// If false, preserve the full datetime including time components.
+  bool truncate_date64_time = false;
+
   /// \brief For certain data types, a cast is needed in order to store the
   /// data in a pandas DataFrame or Series (e.g. timestamps are always stored
   /// as nanoseconds in pandas). This option controls whether it is a safe
diff --git a/python/pyarrow/src/arrow/python/numpy_to_arrow.cc b/python/pyarrow/src/arrow/python/numpy_to_arrow.cc
index 5647e895d0f..50d28a4e342 100644
--- a/python/pyarrow/src/arrow/python/numpy_to_arrow.cc
+++ b/python/pyarrow/src/arrow/python/numpy_to_arrow.cc
@@ -183,7 +183,7 @@ class NumPyConverter {
  public:
   NumPyConverter(MemoryPool* pool, PyObject* arr, PyObject* mo,
                  const std::shared_ptr<DataType>& type, bool from_pandas,
-                 const compute::CastOptions& cast_options = compute::CastOptions())
+                 const compute::CastOptions& cast_options, bool truncate_date64_time)
       : pool_(pool),
         type_(type),
         arr_(reinterpret_cast<PyArrayObject*>(arr)),
@@ -191,6 +191,7 @@ class NumPyConverter {
         mask_(nullptr),
         from_pandas_(from_pandas),
         cast_options_(cast_options),
+        truncate_date64_time_(truncate_date64_time),
         null_bitmap_data_(nullptr),
         null_count_(0) {
     if (mo != nullptr && mo != Py_None) {
@@ -311,6 +312,7 @@ class NumPyConverter {
 
   bool from_pandas_;
   compute::CastOptions cast_options_;
+  bool truncate_date64_time_;
 
   // Used in visitor pattern
   ArrayVector out_arrays_;
@@ -330,6 +332,7 @@ Status NumPyConverter::Convert() {
     PyConversionOptions py_options;
     py_options.type = type_;
     py_options.from_pandas = from_pandas_;
+    py_options.truncate_date64_time = truncate_date64_time_;
     ARROW_ASSIGN_OR_RAISE(
         auto chunked_array,
         ConvertPySequence(reinterpret_cast<PyObject*>(arr_),
@@ -845,7 +848,7 @@ Status NumPyConverter::Visit(const StructType& type) {
       RETURN_IF_PYERROR();
       sub_arrays.emplace_back(sub_array);
       sub_converters.emplace_back(pool_, sub_array, nullptr /* mask */, field->type(),
-                                  from_pandas_);
+                                  from_pandas_, cast_options_, truncate_date64_time_);
     }
   }
 
@@ -916,7 +919,7 @@ Status NumPyConverter::Visit(const StructType& type) {
 
 Status NdarrayToArrow(MemoryPool* pool, PyObject* ao, PyObject* mo, bool from_pandas,
                       const std::shared_ptr<DataType>& type,
-                      const compute::CastOptions& cast_options,
+                      const compute::CastOptions& cast_options, bool truncate_date64_time,
                       std::shared_ptr<ChunkedArray>* out) {
   if (!PyArray_Check(ao)) {
     // This code path cannot be reached by Python unit tests currently so this
@@ -927,7 +930,8 @@ Status NdarrayToArrow(MemoryPool* pool, PyObject* ao, PyObject* mo, bool from_pa
     return Status::Invalid("only handle 1-dimensional arrays");
   }
 
-  NumPyConverter converter(pool, ao, mo, type, from_pandas, cast_options);
+  NumPyConverter converter(pool, ao, mo, type, from_pandas, cast_options,
+                           truncate_date64_time);
   RETURN_NOT_OK(converter.Convert());
   const auto& output_arrays = converter.result();
   ARROW_DCHECK_GT(output_arrays.size(), 0);
@@ -938,7 +942,8 @@ Status NdarrayToArrow(MemoryPool* pool, PyObject* ao, PyObject* mo, bool from_pa
 Status NdarrayToArrow(MemoryPool* pool, PyObject* ao, PyObject* mo, bool from_pandas,
                       const std::shared_ptr<DataType>& type,
                       std::shared_ptr<ChunkedArray>* out) {
-  return NdarrayToArrow(pool, ao, mo, from_pandas, type, compute::CastOptions(), out);
+  return NdarrayToArrow(pool, ao, mo, from_pandas, type, compute::CastOptions(), false,
+                        out);
 }
 
 }  // namespace py
diff --git a/python/pyarrow/src/arrow/python/numpy_to_arrow.h b/python/pyarrow/src/arrow/python/numpy_to_arrow.h
index b6cd093e554..315fc6d535f 100644
--- a/python/pyarrow/src/arrow/python/numpy_to_arrow.h
+++ b/python/pyarrow/src/arrow/python/numpy_to_arrow.h
@@ -46,11 +46,13 @@ namespace py {
 /// whether values are null
 /// \param[in] type a specific type to cast to, may be null
 /// \param[in] cast_options casting options
+/// \param[in] truncate_date64_time If true, truncate intraday milliseconds when
+/// converting Python datetime objects to date64 (default true)
 /// \param[out] out a ChunkedArray, to accommodate chunked output
 ARROW_PYTHON_EXPORT
 Status NdarrayToArrow(MemoryPool* pool, PyObject* ao, PyObject* mo, bool from_pandas,
                       const std::shared_ptr<DataType>& type,
-                      const compute::CastOptions& cast_options,
+                      const compute::CastOptions& cast_options, bool truncate_date64_time,
                       std::shared_ptr<ChunkedArray>* out);
 
 /// Safely convert NumPy arrays to Arrow. If target data type is not known,
diff --git a/python/pyarrow/src/arrow/python/python_to_arrow.cc b/python/pyarrow/src/arrow/python/python_to_arrow.cc
index 139eb1d7f4f..3e883832c83 100644
--- a/python/pyarrow/src/arrow/python/python_to_arrow.cc
+++ b/python/pyarrow/src/arrow/python/python_to_arrow.cc
@@ -303,14 +303,15 @@ class PyValue {
     return value;
   }
 
-  static Result<int64_t> Convert(const Date64Type*, const O&, I obj) {
+  static Result<int64_t> Convert(const Date64Type*, const O& options, I obj) {
     int64_t value;
     if (PyDateTime_Check(obj)) {
       auto pydate = reinterpret_cast<PyDateTime_DateTime*>(obj);
       value = internal::PyDateTime_to_ms(pydate);
-      // Truncate any intraday milliseconds
-      // TODO: introduce an option for this
-      value -= value % 86400000LL;
+      // Truncate any intraday milliseconds if the option is enabled
+      if (options.truncate_date64_time) {
+        value -= value % 86400000LL;
+      }
     } else if (PyDate_Check(obj)) {
       auto pydate = reinterpret_cast<PyDateTime_Date*>(obj);
       value = internal::PyDate_to_ms(pydate);
diff --git a/python/pyarrow/src/arrow/python/python_to_arrow.h b/python/pyarrow/src/arrow/python/python_to_arrow.h
index d167996ba8d..aa61bbe81fa 100644
--- a/python/pyarrow/src/arrow/python/python_to_arrow.h
+++ b/python/pyarrow/src/arrow/python/python_to_arrow.h
@@ -59,6 +59,11 @@ struct PyConversionOptions {
   bool ignore_timezone = false;
 
   bool strict = false;
+
+  /// If true, truncate intraday milliseconds when converting Python datetime
+  /// objects to date64 (default true for backwards compatibility).
+  /// If false, preserve the full datetime including time components.
+  bool truncate_date64_time = true;
 };
 
 /// \brief Convert sequence (list, generator, NumPy array with dtype object) of
diff --git a/python/pyarrow/src/arrow/python/type_traits.h b/python/pyarrow/src/arrow/python/type_traits.h
index 865e1af4276..a467dcd408b 100644
--- a/python/pyarrow/src/arrow/python/type_traits.h
+++ b/python/pyarrow/src/arrow/python/type_traits.h
@@ -34,6 +34,7 @@ namespace arrow {
 namespace py {
 
 static constexpr int64_t kPandasTimestampNull = std::numeric_limits<int64_t>::min();
+constexpr int64_t kMillisecondsInDay = 86400000LL;
 constexpr int64_t kNanosecondsInDay = 86400000000000LL;
 
 namespace internal {
diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi
index 9136f252980..2037c11edaa 100644
--- a/python/pyarrow/table.pxi
+++ b/python/pyarrow/table.pxi
@@ -3366,7 +3366,7 @@ cdef class RecordBatch(_Tabular):
 
     @classmethod
     def from_pandas(cls, df, Schema schema=None, preserve_index=None,
-                    nthreads=None, columns=None):
+                    nthreads=None, columns=None, bint truncate_date64_time=True):
         """
         Convert pandas.DataFrame to an Arrow RecordBatch
 
@@ -3392,6 +3392,10 @@ cdef class RecordBatch(_Tabular):
             :func:`pyarrow.cpu_count` (may use up to system CPU count threads).
         columns : list, optional
            List of column to be converted. If None, use all columns.
+        truncate_date64_time : bool, default True
+            If True (default), truncate intraday milliseconds when converting Python
+            datetime objects to date64.
+            If False, preserve the full datetime including time components.
 
         Returns
         -------
@@ -3448,7 +3452,8 @@ cdef class RecordBatch(_Tabular):
         """
         from pyarrow.pandas_compat import dataframe_to_arrays
         arrays, schema, n_rows = dataframe_to_arrays(
-            df, schema, preserve_index, nthreads=nthreads, columns=columns
+            df, schema, preserve_index, nthreads=nthreads, columns=columns,
+            truncate_date64_time=truncate_date64_time
         )
 
         # If df is empty but row index is not, create empty RecordBatch with rows >0
@@ -4732,7 +4737,8 @@ cdef class Table(_Tabular):
 
     @classmethod
     def from_pandas(cls, df, Schema schema=None, preserve_index=None,
-                    nthreads=None, columns=None, bint safe=True):
+                    nthreads=None, columns=None, bint safe=True,
+                    bint truncate_date64_time=True):
         """
         Convert pandas.DataFrame to an Arrow Table.
 
@@ -4773,6 +4779,10 @@ cdef class Table(_Tabular):
            List of column to be converted. If None, use all columns.
         safe : bool, default True
            Check for overflows or other unsafe conversions.
+        truncate_date64_time : bool, default True
+            If True (default), truncate intraday milliseconds when converting Python
+            datetime objects to date64.
+            If False, preserve the full datetime including time components.
 
         Returns
         -------
@@ -4799,7 +4809,8 @@ cdef class Table(_Tabular):
             preserve_index=preserve_index,
             nthreads=nthreads,
             columns=columns,
-            safe=safe
+            safe=safe,
+            truncate_date64_time=truncate_date64_time
         )
 
         # If df is empty but row index is not, create empty Table with rows >0
diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py
index ec361159c5f..923843989e6 100644
--- a/python/pyarrow/tests/test_array.py
+++ b/python/pyarrow/tests/test_array.py
@@ -2216,6 +2216,216 @@ def test_date64_from_builtin_datetime():
     assert as_i8[0].as_py() == as_i8[1].as_py()
 
 
+def test_date64_truncate_date64_time_option():
+    # Test default behavior: truncate intraday milliseconds
+    dt_with_time = datetime.datetime(2000, 1, 1, 12, 34, 56, 123456)
+    dt_date_only = datetime.datetime(2000, 1, 1)
+
+    # Default behavior (truncate_date64_time=True)
+    result_default = pa.array([dt_with_time], type='date64')
+    result_date_only = pa.array([dt_date_only], type='date64')
+
+    # Both should be equal when truncated
+    assert result_default.equals(result_date_only)
+
+    # Verify the underlying int64 values are the same
+    as_i8_default = result_default.view('int64')
+    as_i8_date_only = result_date_only.view('int64')
+    assert as_i8_default[0].as_py() == as_i8_date_only[0].as_py()
+
+    # Test with truncate_date64_time=False: preserve time components
+    result_preserve = pa.array([dt_with_time], type='date64',
+                               truncate_date64_time=False)
+    result_preserve_date_only = pa.array(
+        [dt_date_only], type='date64', truncate_date64_time=False)
+
+    # These should not be equal when time is preserved
+    assert not result_preserve.equals(result_preserve_date_only)
+
+    # Verify the underlying int64 values are different
+    as_i8_preserve = result_preserve.view('int64')
+    as_i8_preserve_date_only = result_preserve_date_only.view('int64')
+    assert as_i8_preserve[0].as_py() != as_i8_preserve_date_only[0].as_py()
+
+    # The preserved time should have more milliseconds
+    assert as_i8_preserve[0].as_py() > as_i8_preserve_date_only[0].as_py()
+
+
+def test_scalar_date64_truncate_date64_time_option():
+    # Test scalar with default behavior
+    dt_with_time = datetime.datetime(2000, 1, 1, 12, 34, 56, 123456)
+    dt_date_only = datetime.datetime(2000, 1, 1)
+
+    # Default behavior (truncate_date64_time=True)
+    scalar_default = pa.scalar(dt_with_time, type=pa.date64())
+    scalar_date_only = pa.scalar(dt_date_only, type=pa.date64())
+
+    # Both should be equal when truncated
+    assert scalar_default.equals(scalar_date_only)
+
+    # Test with truncate_date64_time=False: preserve time components
+    scalar_preserve = pa.scalar(
+        dt_with_time, type=pa.date64(), truncate_date64_time=False)
+    scalar_preserve_date_only = pa.scalar(
+        dt_date_only, type=pa.date64(), truncate_date64_time=False)
+
+    # These should not be equal when time is preserved
+    assert not scalar_preserve.equals(scalar_preserve_date_only)
+
+
+@pytest.mark.pandas
+def test_date64_from_pandas_with_truncate_date64_time():
+    pd = pytest.importorskip("pandas")
+
+    # Create pandas Series with Python native datetime objects (object dtype)
+    dt_with_time = datetime.datetime(2000, 1, 1, 12, 34, 56, 123456)
+    dt_date_only = datetime.datetime(2000, 1, 1)
+
+    series_with_time = pd.Series([dt_with_time], dtype=object)
+    series_date_only = pd.Series([dt_date_only], dtype=object)
+
+    # Test default behavior: truncate time
+    # (from_pandas=True, default truncate_date64_time=True)
+    arr_with_time_default = pa.array(series_with_time, type=pa.date64(),
+                                     from_pandas=True)
+    arr_date_only_default = pa.array(series_date_only, type=pa.date64(),
+                                     from_pandas=True)
+
+    # Both should be equal when truncated
+    assert arr_with_time_default.equals(arr_date_only_default)
+
+    # Verify underlying int64 values are the same
+    as_i8_with_time = arr_with_time_default.view('int64')
+    as_i8_date_only = arr_date_only_default.view('int64')
+    assert as_i8_with_time[0].as_py() == as_i8_date_only[0].as_py()
+
+    # Test with truncate_date64_time=False: preserve time components
+    # This verifies that from_pandas and truncate_date64_time work together correctly
+    arr_with_time_preserve = pa.array(series_with_time, type=pa.date64(),
+                                      from_pandas=True, truncate_date64_time=False)
+    arr_date_only_preserve = pa.array(series_date_only, type=pa.date64(),
+                                      from_pandas=True, truncate_date64_time=False)
+
+    # These should not be equal when time is preserved
+    assert not arr_with_time_preserve.equals(arr_date_only_preserve)
+
+    # Verify underlying int64 values are different
+    as_i8_with_time_preserve = arr_with_time_preserve.view('int64')
+    as_i8_date_only_preserve = arr_date_only_preserve.view('int64')
+    assert as_i8_with_time_preserve[0].as_py() != as_i8_date_only_preserve[0].as_py()
+
+    # The preserved time should have more milliseconds
+    assert as_i8_with_time_preserve[0].as_py() > as_i8_date_only_preserve[0].as_py()
+
+    # Test that from_pandas=True doesn't interfere with truncate_date64_time behavior
+    # Compare with from_pandas=False to ensure consistent behavior
+    arr_with_time_no_pandas = pa.array([dt_with_time], type=pa.date64(),
+                                       from_pandas=False, truncate_date64_time=False)
+    arr_with_time_pandas = pa.array(series_with_time, type=pa.date64(),
+                                    from_pandas=True, truncate_date64_time=False)
+
+    # Both should produce the same result when truncate_date64_time=False
+    assert arr_with_time_no_pandas.equals(arr_with_time_pandas)
+
+
+def test_date64_numpy_array_truncate_date64_time_option():
+    np = pytest.importorskip("numpy")
+
+    # Create NumPy array with object dtype containing Python datetime objects
+    dt_with_time = datetime.datetime(2000, 1, 1, 12, 34, 56, 123456)
+    dt_date_only = datetime.datetime(2000, 1, 1)
+
+    arr_with_time = np.array([dt_with_time], dtype=object)
+    arr_date_only = np.array([dt_date_only], dtype=object)
+
+    # Test default behavior: NumPy arrays truncate by default
+    # (since array() defaults to True)
+    arr_with_time_default = pa.array(arr_with_time, type=pa.date64())
+    arr_date_only_default = pa.array(arr_date_only, type=pa.date64())
+
+    # These should be equal because NumPy arrays truncate by default
+    assert arr_with_time_default.equals(arr_date_only_default)
+
+    # Verify underlying int64 values are the same (truncated)
+    as_i8_with_time = arr_with_time_default.view('int64')
+    as_i8_date_only = arr_date_only_default.view('int64')
+    assert as_i8_with_time[0].as_py() == as_i8_date_only[0].as_py()
+
+    # Test explicit truncate_date64_time=False: should preserve time
+    arr_with_time_preserve = pa.array(arr_with_time, type=pa.date64(),
+                                      truncate_date64_time=False)
+    arr_date_only_preserve = pa.array(arr_date_only, type=pa.date64(),
+                                      truncate_date64_time=False)
+
+    # These should not be equal when time is preserved
+    assert not arr_with_time_preserve.equals(arr_date_only_preserve)
+
+    # Verify underlying int64 values are different when time is preserved
+    as_i8_with_time_preserve = arr_with_time_preserve.view('int64')
+    as_i8_date_only_preserve = arr_date_only_preserve.view('int64')
+    assert as_i8_with_time_preserve[0].as_py() != as_i8_date_only_preserve[0].as_py()
+    assert as_i8_with_time_preserve[0].as_py() > as_i8_date_only_preserve[0].as_py()
+
+
+@pytest.mark.pandas
+def test_date64_to_pandas_truncate_date64_time_option():
+    pd = pytest.importorskip("pandas")
+
+    # Create date64 array with time components
+    # 2018-05-10 00:00:00
+    milliseconds_at_midnight = 1525910400000
+    # 2018-05-10 00:02:03.456
+    milliseconds_with_time = milliseconds_at_midnight + 123456
+
+    arr = pa.array([milliseconds_at_midnight, milliseconds_with_time],
+                   type=pa.date64())
+
+    # Test default behavior: preserve time components (truncate_date64_time=False)
+    result_default = arr.to_pandas(date_as_object=False)
+    expected_default = pd.Series([
+        pd.Timestamp('2018-05-10 00:00:00'),
+        pd.Timestamp('2018-05-10 00:02:03.456'),
+    ], dtype='datetime64[ms]')
+    pd.testing.assert_series_equal(result_default, expected_default)
+
+    # Test with truncate_date64_time=True: truncate time components
+    result_truncated = arr.to_pandas(date_as_object=False, truncate_date64_time=True)
+    expected_truncated = pd.Series([
+        pd.Timestamp('2018-05-10 00:00:00'),
+        pd.Timestamp('2018-05-10 00:00:00'),
+    ], dtype='datetime64[ms]')
+    pd.testing.assert_series_equal(result_truncated, expected_truncated)
+
+    # Test with datetime64[ns] conversion
+    result_ns_default = arr.to_pandas(date_as_object=False,
+                                      coerce_temporal_nanoseconds=True)
+    expected_ns_default = pd.Series([
+        pd.Timestamp('2018-05-10 00:00:00'),
+        pd.Timestamp('2018-05-10 00:02:03.456'),
+    ], dtype='datetime64[ns]')
+    pd.testing.assert_series_equal(result_ns_default, expected_ns_default)
+
+    result_ns_truncated = arr.to_pandas(date_as_object=False,
+                                        coerce_temporal_nanoseconds=True,
+                                        truncate_date64_time=True)
+    expected_ns_truncated = pd.Series([
+        pd.Timestamp('2018-05-10 00:00:00'),
+        pd.Timestamp('2018-05-10 00:00:00'),
+    ], dtype='datetime64[ns]')
+    pd.testing.assert_series_equal(result_ns_truncated, expected_ns_truncated)
+
+    # Test with ChunkedArray
+    chunked = pa.chunked_array([[milliseconds_at_midnight],
+                                [milliseconds_with_time]],
+                               type=pa.date64())
+    result_chunked_default = chunked.to_pandas(date_as_object=False)
+    pd.testing.assert_series_equal(result_chunked_default, expected_default)
+
+    result_chunked_truncated = chunked.to_pandas(date_as_object=False,
+                                                 truncate_date64_time=True)
+    pd.testing.assert_series_equal(result_chunked_truncated, expected_truncated)
+
+
 @pytest.mark.parametrize(('ty', 'values'), [
     ('bool', [True, False, True]),
     ('uint8', range(0, 255)),