From d6038e9dee113441e29a8ca54eebf44fbe7af172 Mon Sep 17 00:00:00 2001
From: o0Shark0o <1191322051@qq.com>
Date: Wed, 21 Jan 2026 16:17:40 +0800
Subject: [PATCH] feat(auto-annotation): sync tags and timestamps to datasets
and optimize visibility
---
.../domain/model/dataset/DatasetFile.java | 1 +
.../interfaces/dto/DatasetFileResponse.java | 4 +
.../resources/mappers/DatasetFileMapper.xml | 196 +++++++++---------
.../Detail/components/Overview.tsx | 105 ++++++++++
.../datamate/auto_annotation_worker.py | 111 +++++++++-
5 files changed, 314 insertions(+), 103 deletions(-)
diff --git a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/domain/model/dataset/DatasetFile.java b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/domain/model/dataset/DatasetFile.java
index 45a58a123..f164ee164 100644
--- a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/domain/model/dataset/DatasetFile.java
+++ b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/domain/model/dataset/DatasetFile.java
@@ -32,6 +32,7 @@ public class DatasetFile {
private Long fileSize; // bytes
private String checkSum;
private String tags;
+ private LocalDateTime tagsUpdatedAt;
private String metadata;
private String status; // UPLOADED, PROCESSING, COMPLETED, ERROR
private LocalDateTime uploadTime;
diff --git a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/DatasetFileResponse.java b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/DatasetFileResponse.java
index 020557135..9e7dfbeb9 100644
--- a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/DatasetFileResponse.java
+++ b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/DatasetFileResponse.java
@@ -27,6 +27,10 @@ public class DatasetFileResponse {
private String description;
/** 文件路径 */
private String filePath;
+ /** 文件标签(JSON 字符串) */
+ private String tags;
+ /** 标签更新时间 */
+ private LocalDateTime tagsUpdatedAt;
/** 上传时间 */
private LocalDateTime uploadTime;
/** 最后更新时间 */
diff --git a/backend/services/data-management-service/src/main/resources/mappers/DatasetFileMapper.xml b/backend/services/data-management-service/src/main/resources/mappers/DatasetFileMapper.xml
index f5c6a1ed9..597886cc1 100644
--- a/backend/services/data-management-service/src/main/resources/mappers/DatasetFileMapper.xml
+++ b/backend/services/data-management-service/src/main/resources/mappers/DatasetFileMapper.xml
@@ -1,98 +1,98 @@
-
-
-
-
- id, dataset_id, file_name, file_path, file_type, file_size, check_sum, tags, metadata, status,
- upload_time, last_access_time, created_at, updated_at
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- UPDATE t_dm_dataset_files
- SET file_name = #{fileName},
- file_path = #{filePath},
- file_type = #{fileType},
- file_size = #{fileSize},
- upload_time = #{uploadTime},
- last_access_time = #{lastAccessTime},
- status = #{status}
- WHERE id = #{id}
-
-
-
- DELETE FROM t_dm_dataset_files WHERE id = #{id}
-
-
+
+
+
+
+ id, dataset_id, file_name, file_path, file_type, file_size, check_sum, tags, tags_updated_at, metadata, status,
+ upload_time, last_access_time, created_at, updated_at
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ UPDATE t_dm_dataset_files
+ SET file_name = #{fileName},
+ file_path = #{filePath},
+ file_type = #{fileType},
+ file_size = #{fileSize},
+ upload_time = #{uploadTime},
+ last_access_time = #{lastAccessTime},
+ status = #{status}
+ WHERE id = #{id}
+
+
+
+ DELETE FROM t_dm_dataset_files WHERE id = #{id}
+
+
diff --git a/frontend/src/pages/DataManagement/Detail/components/Overview.tsx b/frontend/src/pages/DataManagement/Detail/components/Overview.tsx
index 454cb1711..8983d4623 100644
--- a/frontend/src/pages/DataManagement/Detail/components/Overview.tsx
+++ b/frontend/src/pages/DataManagement/Detail/components/Overview.tsx
@@ -189,6 +189,64 @@ export default function Overview({ dataset, filesOperation, fetchDataset }) {
width: 200,
render: (text) => formatDateTime(text),
},
+ {
+ title: "标签",
+ dataIndex: "tags",
+ key: "tags",
+ width: 220,
+ render: (value: any, record: any) => {
+ const isDirectory = typeof record.id === "string" && record.id.startsWith("directory-");
+ if (isDirectory) return "-";
+
+ let raw = value;
+ if (!raw) return "-";
+
+ // 后端目前将 tags 作为 JSON 字符串存储在 t_dm_dataset_files.tags 中
+ // 这里尝试解析为 FileTag 数组结构 [{ type, from_name, values: { [type]: [...] } }]
+ if (typeof raw === "string") {
+ try {
+ raw = JSON.parse(raw);
+ } catch {
+ // 解析失败则直接展示原始字符串
+ return raw;
+ }
+ }
+
+ if (!Array.isArray(raw) || raw.length === 0) return "-";
+
+ const labels: string[] = [];
+ raw.forEach((tag: any) => {
+ const type = tag?.type;
+ const valuesObj = tag?.values || {};
+ const tagValues = (type && valuesObj[type]) || [];
+
+ if (Array.isArray(tagValues)) {
+ tagValues.forEach((item) => {
+ if (typeof item === "string" && !labels.includes(item)) {
+ labels.push(item);
+ }
+ });
+ } else if (typeof tagValues === "string" && !labels.includes(tagValues)) {
+ labels.push(tagValues);
+ }
+ });
+
+ if (!labels.length) return "-";
+ return labels.join(", ");
+ },
+ },
+ {
+ title: "标签更新时间",
+ dataIndex: "tagsUpdatedAt",
+ key: "tagsUpdatedAt",
+ width: 200,
+ render: (text: any, record: any) => {
+ const isDirectory = typeof record.id === "string" && record.id.startsWith("directory-");
+ if (isDirectory) return "-";
+ if (!text) return "-";
+ return formatDateTime(text);
+ },
+ },
{
title: "操作",
key: "action",
@@ -528,6 +586,53 @@ export default function Overview({ dataset, filesOperation, fetchDataset }) {
{previewFileDetail.status}
)}
+ {previewFileDetail?.tags && (
+
+ 标签:
+
+ {(() => {
+ let raw = previewFileDetail.tags as any;
+ if (!raw) return "-";
+
+ if (typeof raw === "string") {
+ try {
+ raw = JSON.parse(raw);
+ } catch {
+ return raw;
+ }
+ }
+
+ if (!Array.isArray(raw) || raw.length === 0) return "-";
+
+ const labels: string[] = [];
+ raw.forEach((tag: any) => {
+ const type = tag?.type;
+ const valuesObj = tag?.values || {};
+ const tagValues = (type && valuesObj[type]) || [];
+
+ if (Array.isArray(tagValues)) {
+ tagValues.forEach((item) => {
+ if (typeof item === "string" && !labels.includes(item)) {
+ labels.push(item);
+ }
+ });
+ } else if (typeof tagValues === "string" && !labels.includes(tagValues)) {
+ labels.push(tagValues);
+ }
+ });
+
+ if (!labels.length) return "-";
+ return labels.join(", ");
+ })()}
+
+
+ )}
+ {previewFileDetail?.tagsUpdatedAt && (
+
+ 标签更新时间:
+ {formatDateTime(previewFileDetail.tagsUpdatedAt)}
+
+ )}
{previewFileDetail?.uploadTime && (
上传时间:
diff --git a/runtime/python-executor/datamate/auto_annotation_worker.py b/runtime/python-executor/datamate/auto_annotation_worker.py
index 8ab2df412..a270c5c91 100644
--- a/runtime/python-executor/datamate/auto_annotation_worker.py
+++ b/runtime/python-executor/datamate/auto_annotation_worker.py
@@ -27,7 +27,7 @@
import uuid
from datetime import datetime
from pathlib import Path
-from typing import Any, Dict, List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple, Set
from loguru import logger
from sqlalchemy import text
@@ -245,6 +245,77 @@ def _load_files_by_ids(file_ids: List[str]) -> List[Tuple[str, str, str]]:
return [(str(r[0]), str(r[1]), str(r[2])) for r in rows]
+def _build_file_tags_from_detections(detections: List[Dict[str, Any]]) -> Optional[List[Dict[str, Any]]]:
+ """根据检测结果构建 FileTag JSON 结构。
+
+ tags 字段在 DM 服务中被当作 List[FileTag] 解析,结构需与
+ backend `FileTag`/runtime `DatasetFileTag` 保持兼容:
+
+ [{
+ "id": "...", # 可选
+ "type": "labels", # 类型键
+ "from_name": "auto_annotation", # 来源
+ "values": {"labels": ["Person", "Car"]}
+ }]
+
+ 重复类别只保留一个。
+ """
+
+ if not detections:
+ return None
+
+ label_set: Set[str] = set()
+ for det in detections:
+ label = det.get("label")
+ if isinstance(label, str) and label:
+ label_set.add(label)
+
+ if not label_set:
+ return None
+
+ # 排序以保证结果稳定
+ labels = sorted(label_set)
+ return [
+ {
+ "id": None,
+ "type": "labels",
+ "from_name": "auto_annotation",
+ "values": {"labels": labels},
+ }
+ ]
+
+
+def _update_dataset_file_tags(file_id: str, tags: List[Dict[str, Any]]) -> None:
+ """将标签写入 t_dm_dataset_files.tags 并更新 tags_updated_at。"""
+
+ if not file_id:
+ return
+
+ try:
+ now = datetime.utcnow()
+ sql = text(
+ """
+ UPDATE t_dm_dataset_files
+ SET tags = :tags,
+ tags_updated_at = :tags_updated_at
+ WHERE id = :file_id
+ """
+ )
+ params = {
+ "file_id": file_id,
+ "tags": json.dumps(tags, ensure_ascii=False),
+ "tags_updated_at": now,
+ }
+ with SQLManager.create_connect() as conn:
+ conn.execute(sql, params)
+ except Exception as e: # pragma: no cover - 防御性日志
+ logger.error(
+ "Failed to update tags for dataset file {}: {}",
+ file_id,
+ e,
+ )
+
+
def _ensure_output_dir(output_dir: str) -> str:
"""确保输出目录及其 images/、annotations/ 子目录存在。"""
@@ -296,6 +367,8 @@ def _register_output_dataset(
output_dir: str,
output_dataset_name: str,
total_images: int,
+ *,
+ tags_by_filename: Optional[Dict[str, List[Dict[str, Any]]]] = None,
) -> None:
"""将自动标注结果注册到新建的数据集。"""
@@ -347,9 +420,9 @@ def _register_output_dataset(
insert_file_sql = text(
"""
INSERT INTO t_dm_dataset_files (
- id, dataset_id, file_name, file_path, file_type, file_size, status
+ id, dataset_id, file_name, file_path, file_type, file_size, status, tags, tags_updated_at
) VALUES (
- :id, :dataset_id, :file_name, :file_path, :file_type, :file_size, :status
+ :id, :dataset_id, :file_name, :file_path, :file_type, :file_size, :status, :tags, :tags_updated_at
)
"""
)
@@ -367,6 +440,9 @@ def _register_output_dataset(
for file_name, file_path, file_size in image_files:
ext = os.path.splitext(file_name)[1].lstrip(".").upper() or None
+ file_tags = None
+ if tags_by_filename:
+ file_tags = tags_by_filename.get(file_name)
conn.execute(
insert_file_sql,
{
@@ -377,6 +453,8 @@ def _register_output_dataset(
"file_type": ext,
"file_size": int(file_size),
"status": "ACTIVE",
+ "tags": json.dumps(file_tags, ensure_ascii=False) if file_tags else None,
+ "tags_updated_at": datetime.utcnow() if file_tags else None,
},
)
added_count += 1
@@ -393,6 +471,8 @@ def _register_output_dataset(
"file_type": ext,
"file_size": int(file_size),
"status": "ACTIVE",
+ "tags": None,
+ "tags_updated_at": None,
},
)
added_count += 1
@@ -477,7 +557,8 @@ def _process_single_task(task: Dict[str, Any]) -> None:
else:
all_files = _load_dataset_files(dataset_id)
- files = [(path, name) for _, path, name in all_files]
+ # all_files: List[(file_id, file_path, file_name)]
+ files = all_files
total_images = len(files)
if total_images == 0:
@@ -523,7 +604,10 @@ def _process_single_task(task: Dict[str, Any]) -> None:
processed = 0
detected_total = 0
- for file_path, file_name in files:
+ # 记录:文件名 -> FileTag JSON,用于给新输出数据集的文件打标签
+ tags_by_filename: Dict[str, List[Dict[str, Any]]] = {}
+
+ for file_id, file_path, file_name in files:
try:
sample = {
"image": file_path,
@@ -536,6 +620,22 @@ def _process_single_task(task: Dict[str, Any]) -> None:
detected_total += len(detections)
processed += 1
+ # 基于检测结果生成标签(按类别去重),并写回源数据集文件
+ file_tags = _build_file_tags_from_detections(detections)
+ if file_tags:
+ try:
+ _update_dataset_file_tags(file_id, file_tags)
+ # 使用源文件名作为 key,供输出数据集复用
+ base_name = os.path.basename(file_path)
+ tags_by_filename[base_name] = file_tags
+ except Exception as e:
+ logger.error(
+ "Failed to persist tags for file {} in dataset {}: {}",
+ file_id,
+ dataset_id,
+ e,
+ )
+
progress = int(processed * 100 / total_images) if total_images > 0 else 100
_update_task_status(
@@ -584,6 +684,7 @@ def _process_single_task(task: Dict[str, Any]) -> None:
output_dir=output_dir,
output_dataset_name=output_dataset_name,
total_images=total_images,
+ tags_by_filename=tags_by_filename,
)
except Exception as e: # pragma: no cover - 防御性日志
logger.error(