OrderLab · Essoz · Jan 4, 2026 · Jan 4, 2026 · Jan 5, 2026 · Jan 6, 2026
diff --git a/.github/workflows/eval-overhead-e2e.yml b/.github/workflows/eval-overhead-e2e.yml
@@ -6,13 +6,11 @@ on:
     paths:
       - '.github/workflows/**'
       - 'traincheck/instrumentor/**'
-      - 'traincheck/proxy_wrapper/**'
       - 'traincheck/collect_trace.py'
   pull_request:
     paths:
       - '.github/workflows/**'
       - 'traincheck/instrumentor/**'
-      - 'traincheck/proxy_wrapper/**'
       - 'traincheck/collect_trace.py'
 
 

diff --git a/docs/5-min-tutorial.md b/docs/5-min-tutorial.md
@@ -246,7 +246,7 @@ For example, the "`optimizer.zero_grad` did **not** reset `.grad` from non-zero
             "var_type": NaN,
             "mode": NaN,
             "dump_loc": NaN,
-            "attributes._ML_DAIKON_data_ID": NaN,
+            "attributes._TRAINCHECK_data_ID": NaN,
             "attributes.data": NaN,
             "attributes.dtype": NaN,
             "attributes.grad": NaN,
@@ -274,7 +274,7 @@ For example, the "`optimizer.zero_grad` did **not** reset `.grad` from non-zero
             "attributes.requires_grad": NaN,
             "attributes.retains_grad": NaN,
             "attributes.shape": NaN,
-            "attributes._ML_DAIKON_grad_ID": NaN,
+            "attributes._TRAINCHECK_grad_ID": NaN,
             "exception": NaN,
             "exception_msg": NaN,
             "proxy_obj_names": NaN

diff --git a/docs/ae-eval-s5.1-silent-issue-detection.md b/docs/ae-eval-s5.1-silent-issue-detection.md
@@ -145,9 +145,9 @@ diff --color -r checker_output/trace_pytorch-104336/failed.log reference_checker
 >             "process_id": 9591,
 >             "thread_id": 140324043503424,
 86c86
-<             "attributes._ML_DAIKON_data_ID": 140704882109040,
+<             "attributes._TRAINCHECK_data_ID": 140704882109040,
 ---
->             "attributes._ML_DAIKON_data_ID": 140317529048544,
+>             "attributes._TRAINCHECK_data_ID": 140317529048544,
 116,117c116,117
 <             "time": 2437523672783,
 <             "meta_vars._DATA_PARALLEL_RANK": 4.0,
@@ -161,9 +161,9 @@ diff --color -r checker_output/trace_pytorch-104336/failed.log reference_checker
 >             "process_id": 9747,
 >             "thread_id": 140028492969792,
 128c128
-<             "attributes._ML_DAIKON_data_ID": 140043703504144,
+<             "attributes._TRAINCHECK_data_ID": 140043703504144,
 ---
->             "attributes._ML_DAIKON_data_ID": 140021978318304,
+>             "attributes._TRAINCHECK_data_ID": 140021978318304,
 158,159c158,159
 <             "time": 2437502499438,
 <             "meta_vars._DATA_PARALLEL_RANK": 2.0,
@@ -182,9 +182,9 @@ diff --color -r checker_output/trace_pytorch-115607/failed.log reference_checker
 <             "exception_msg": NaN,
 <             "proxy_obj_names": NaN,
 113c110,113
-<             "attributes._ML_DAIKON_grad_ID": NaN
+<             "attributes._TRAINCHECK_grad_ID": NaN
 ---
->             "attributes._ML_DAIKON_grad_ID": NaN,
+>             "attributes._TRAINCHECK_grad_ID": NaN,
 >             "exception": NaN,
 >             "exception_msg": NaN,
 >             "proxy_obj_names": NaN
@@ -193,9 +193,9 @@ diff --color -r checker_output/trace_pytorch-115607/failed.log reference_checker
 <             "exception_msg": NaN,
 <             "proxy_obj_names": NaN,
 215c212,215
-<             "attributes._ML_DAIKON_grad_ID": NaN
+<             "attributes._TRAINCHECK_grad_ID": NaN
 ---
->             "attributes._ML_DAIKON_grad_ID": NaN,
+>             "attributes._TRAINCHECK_grad_ID": NaN,
 >             "exception": NaN,
 >             "exception_msg": NaN,
 >             "proxy_obj_names": NaN
@@ -210,9 +210,9 @@ diff --color -r checker_output/trace_pytorch-115607/failed.log reference_checker
 <             "exception_msg": NaN,
 <             "proxy_obj_names": NaN,
 331c328,331
-<             "attributes._ML_DAIKON_grad_ID": NaN
+<             "attributes._TRAINCHECK_grad_ID": NaN
 ---
->             "attributes._ML_DAIKON_grad_ID": NaN,
+>             "attributes._TRAINCHECK_grad_ID": NaN,
 >             "exception": NaN,
 >             "exception_msg": NaN,
 >             "proxy_obj_names": NaN
@@ -247,10 +247,10 @@ diff --color -r checker_output/trace_pytorch-51800/failed.log reference_checker_
 >             "time": 19876858668088743,
 >             "meta_vars.step": 0,
 89c70,89
-<             "attributes._ML_DAIKON_grad_ID": NaN
+<             "attributes._TRAINCHECK_grad_ID": NaN
 ---
 >             "type": "function_call (pre)",
->             "attributes._ML_DAIKON_grad_ID": NaN,
+>             "attributes._TRAINCHECK_grad_ID": NaN,
 >             "func_call_id": "b39a4a81b2c24473ba916ab1832fbf12_19876858668012869",
 >             "function": "torch.nn.modules.module.Module.eval",
 >             "is_bound_method": true,
@@ -290,9 +290,9 @@ diff --color -r checker_output/trace_x-jxmnop-ddp-out-of-sync/failed.log referen
 ---
 >             "meta_vars._DATA_PARALLEL_RANK": "1",
 87c87
-<             "attributes._ML_DAIKON_data_ID": 140656561409856,
+<             "attributes._TRAINCHECK_data_ID": 140656561409856,
 ---
->             "attributes._ML_DAIKON_data_ID": 140621279056480,
+>             "attributes._TRAINCHECK_data_ID": 140621279056480,
 117c117
 <             "time": 123297988837864,
 ---
@@ -308,9 +308,9 @@ diff --color -r checker_output/trace_x-jxmnop-ddp-out-of-sync/failed.log referen
 ---
 >             "meta_vars._DATA_PARALLEL_RANK": "0",
 129c129
-<             "attributes._ML_DAIKON_data_ID": 140621279058160,
+<             "attributes._TRAINCHECK_data_ID": 140621279058160,
 ---
->             "attributes._ML_DAIKON_data_ID": 140656561411776,
+>             "attributes._TRAINCHECK_data_ID": 140656561411776,
 159c159
 <             "time": 123299970638648,
 ---

diff --git a/docs/assets/code/mnist.py b/docs/assets/code/mnist.py
@@ -8,9 +8,9 @@
 from torchvision import datasets, transforms
 
 from traincheck import annotate_stage
-from traincheck.instrumentor import meta_vars
+from traincheck.instrumentor import META_VARS
 
-meta_vars["step"] = -1
+META_VARS["step"] = -1
 
 
 class Net(nn.Module):
@@ -40,10 +40,10 @@ def forward(self, x):
 
 
 def train(args, model, device, train_loader, optimizer, epoch):
-    annotate_stage("training")  # ML_DAIKON: stage annotation
+    annotate_stage("training")  # TRAINCHECK: stage annotation
     model.train()
     for batch_idx, (data, target) in enumerate(train_loader):
-        meta_vars["step"] += 1
+        META_VARS["step"] += 1
         data, target = data.to(device), target.to(device)
         optimizer.zero_grad()
         output = model(data)
@@ -63,13 +63,13 @@ def train(args, model, device, train_loader, optimizer, epoch):
             if args.dry_run:
                 break
 
-        # ML_DAIKON: break after 100 batches
+        # TRAINCHECK: break after 100 batches
         if batch_idx == 50:
             break
 
 
 def test(model, device, test_loader):
-    annotate_stage("testing")  # ML_DAIKON: stage annotation
+    annotate_stage("testing")  # TRAINCHECK: stage annotation
     model.eval()
     test_loss = 0
     correct = 0
@@ -87,7 +87,7 @@ def test(model, device, test_loader):
             correct += pred.eq(target.view_as(pred)).sum().item()
 
             data_idx += 1
-            # ML_DAIKON: break after 10 batches
+            # TRAINCHECK: break after 10 batches
             if data_idx == 10:
                 break
 
@@ -174,7 +174,7 @@ def main():
     )
     args = parser.parse_args()
 
-    annotate_stage("init")  # ML_DAIKON: stage annotation
+    annotate_stage("init")  # TRAINCHECK: stage annotation
     use_cuda = not args.no_cuda and torch.cuda.is_available()
     use_mps = not args.no_mps and torch.backends.mps.is_available()
 
@@ -191,7 +191,7 @@ def main():
     test_kwargs = {"batch_size": args.test_batch_size}
     if use_cuda:
         cuda_kwargs = {"num_workers": 2, "pin_memory": True, "shuffle": True}
-        # ML_DAIKON: set num_workers to 0 to avoid dataloader related invariants
+        # TRAINCHECK: set num_workers to 0 to avoid dataloader related invariants
         # cuda_kwargs = {'num_workers': 0, 'pin_memory': True, 'shuffle': True}
         train_kwargs.update(cuda_kwargs)
         test_kwargs.update(cuda_kwargs)
@@ -212,11 +212,11 @@ def main():
         train(args, model, device, train_loader, optimizer, epoch)
         test(model, device, test_loader)
 
-        annotate_stage("training")  # ML_DAIKON: stage annotation
+        annotate_stage("training")  # TRAINCHECK: stage annotation
         scheduler.step()
 
     if args.save_model:
-        annotate_stage("checkpointing")  # ML_DAIKON: stage annotation
+        annotate_stage("checkpointing")  # TRAINCHECK: stage annotation
         torch.save(model.state_dict(), "mnist_cnn.pt")
 
 

diff --git a/docs/assets/examples/traincheck-collect/mnist-config/mnist.py b/docs/assets/examples/traincheck-collect/mnist-config/mnist.py
@@ -8,9 +8,9 @@
 from torchvision import datasets, transforms
 
 from traincheck import annotate_stage
-from traincheck.instrumentor import meta_vars
+from traincheck.instrumentor import META_VARS
 
-meta_vars["step"] = -1
+META_VARS["step"] = -1
 
 
 class Net(nn.Module):
@@ -40,10 +40,10 @@ def forward(self, x):
 
 
 def train(args, model, device, train_loader, optimizer, epoch):
-    annotate_stage("training")  # ML_DAIKON: stage annotation
+    annotate_stage("training")  # TRAINCHECK: stage annotation
     model.train()
     for batch_idx, (data, target) in enumerate(train_loader):
-        meta_vars["step"] += 1
+        META_VARS["step"] += 1
         data, target = data.to(device), target.to(device)
         optimizer.zero_grad()
         output = model(data)
@@ -63,13 +63,13 @@ def train(args, model, device, train_loader, optimizer, epoch):
             if args.dry_run:
                 break
 
-        # ML_DAIKON: break after 100 batches
+        # TRAINCHECK: break after 100 batches
         if batch_idx == 50:
             break
 
 
 def test(model, device, test_loader):
-    annotate_stage("testing")  # ML_DAIKON: stage annotation
+    annotate_stage("testing")  # TRAINCHECK: stage annotation
     model.eval()
     test_loss = 0
     correct = 0
@@ -87,7 +87,7 @@ def test(model, device, test_loader):
             correct += pred.eq(target.view_as(pred)).sum().item()
 
             data_idx += 1
-            # ML_DAIKON: break after 10 batches
+            # TRAINCHECK: break after 10 batches
             if data_idx == 10:
                 break
 
@@ -174,7 +174,7 @@ def main():
     )
     args = parser.parse_args()
 
-    annotate_stage("init")  # ML_DAIKON: stage annotation
+    annotate_stage("init")  # TRAINCHECK: stage annotation
     use_cuda = not args.no_cuda and torch.cuda.is_available()
     use_mps = not args.no_mps and torch.backends.mps.is_available()
 
@@ -191,7 +191,7 @@ def main():
     test_kwargs = {"batch_size": args.test_batch_size}
     if use_cuda:
         cuda_kwargs = {"num_workers": 2, "pin_memory": True, "shuffle": True}
-        # ML_DAIKON: set num_workers to 0 to avoid dataloader related invariants
+        # TRAINCHECK: set num_workers to 0 to avoid dataloader related invariants
         # cuda_kwargs = {'num_workers': 0, 'pin_memory': True, 'shuffle': True}
         train_kwargs.update(cuda_kwargs)
         test_kwargs.update(cuda_kwargs)
@@ -212,11 +212,11 @@ def main():
         train(args, model, device, train_loader, optimizer, epoch)
         test(model, device, test_loader)
 
-        annotate_stage("training")  # ML_DAIKON: stage annotation
+        annotate_stage("training")  # TRAINCHECK: stage annotation
         scheduler.step()
 
     if args.save_model:
-        annotate_stage("checkpointing")  # ML_DAIKON: stage annotation
+        annotate_stage("checkpointing")  # TRAINCHECK: stage annotation
         torch.save(model.state_dict(), "mnist_cnn.pt")