CloudDock Header

Batch Inference & Model Export

Load best.pt → run batch predictions → save CSV → (optional) sort files by predicted label → export TorchScript and ONNX.

Batch inference and export overview in Jupyter
Overview: batch predictions from a checkpoint and exports saved under /workspace/exports/.

Overview

This page runs batch inference for an image classification model you trained earlier, then exports the model to TorchScript and ONNX. Results are written to CSV with top-K probabilities. All paths assume the standard CloudDock layout with a writable /workspace/.

Tip: You can monitor VRAM and CPU in the System panel in the Launcher while this notebook runs.

Folders

/workspace/
  runs/your_run_id/
    best.pt
    meta.json
  data/
    infer/                      # put images here (jpg/png/webp)
      *.jpg
  outputs/
    infer/
      predictions.csv
  exports/
    model.ts                    # TorchScript
    model.onnx                  # ONNX

Setup

%pip install torch torchvision pillow pandas tqdm --quiet

Load checkpoint

import torch, json
from pathlib import Path
from collections import OrderedDict
from torchvision import transforms


ROOT = Path("/workspace")
RUN  = ROOT/"runs/cls_exp1"          
CKPT = RUN/"best.pt"                 
META = RUN/"meta.json"               
OUTD = ROOT/"outputs/infer"; OUTD.mkdir(parents=True, exist_ok=True)


# 1)
classes = []
if META.exists():
    try:
        meta = json.load(open(META))
        classes = meta.get("classes", []) or meta.get("class_names", [])
    except Exception as e:
        print("meta.json not usable:", e)


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")



import torch.nn as nn, torch.nn.functional as F
class SmallCNN(nn.Module):
    def __init__(self, n):
        super().__init__()
        self.conv1=nn.Conv2d(3,32,3,padding=1); self.conv2=nn.Conv2d(32,64,3,padding=1); self.conv3=nn.Conv2d(64,128,3,padding=1)
        self.pool=nn.MaxPool2d(2,2); self.head=nn.Linear(128,n)
    def forward(self,x):
        x=self.pool(F.relu(self.conv1(x))); x=self.pool(F.relu(self.conv2(x))); x=self.pool(F.relu(self.conv3(x)))
        x=F.adaptive_avg_pool2d(x,(1,1)).flatten(1); return self.head(x)


def strip_module_prefix(sd: dict):
    if any(k.startswith("module.") for k in sd.keys()):
        return OrderedDict((k.replace("module.", "", 1), v) for k,v in sd.items())
    return sd


def load_model_from(CKPT, num_classes_hint=None):
    obj = torch.load(CKPT, map_location=device)


    # a)
    if isinstance(obj, nn.Module):
        m = obj.to(device).eval()
        print("[load] loaded full nn.Module; skip state_dict")
        return m


    # b)
    if isinstance(obj, dict):
        for key in ("classes", "class_names"):
            if key in obj and not classes:
                try:
                    cls = obj[key]
                    if isinstance(cls, (list, tuple)):
                        globals()["classes"] = list(cls)
                except: pass


        cand = None
        if "model" in obj and isinstance(obj["model"], dict):
            cand = obj["model"]
            print("[load] using ckpt['model']")
        elif "state_dict" in obj and isinstance(obj["state_dict"], dict):
            cand = obj["state_dict"]
            print("[load] using ckpt['state_dict']")
        elif all(isinstance(v, torch.Tensor) for v in obj.values()):
            cand = obj
            print("[load] using raw state_dict (top-level)")


        if cand is None:
            raise ValueError(f"Unknown checkpoint dict format. Keys: {list(obj.keys())[:10]} ...")


        cand = strip_module_prefix(cand)
        n_cls = len(classes) if classes else (num_classes_hint or 2)
        m = SmallCNN(n_cls).to(device).eval()
        missing, unexpected = m.load_state_dict(cand, strict=False)
        if missing:   print("[load] missing keys:", missing)
        if unexpected:print("[load] unexpected keys:", unexpected)
        return m


    raise TypeError(f"Unsupported checkpoint object type: {type(obj)}")


model = load_model_from(CKPT, num_classes_hint=2) 
print("Classes:", classes if classes else "(unknown — using indices)")


infer_tf = transforms.Compose([transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor()])

Path Ckeck

from pathlib import Path
INFD = Path("/workspace/data/images/clean") 
print("INFD resolves to:", INFD.resolve())


all_files = [p for p in INFD.rglob("*") if p.is_file()]
print("total files under INFD:", len(all_files))
print("exts:", sorted({p.suffix.lower() for p in all_files})[:20])


images = [p for p in all_files if p.suffix.lower() in (".jpg", ".jpeg", ".png", ".webp")]
print("images matched:", len(images))
print("first 5:", [str(p.relative_to(INFD)) for p in images[:5]])

Batch inference (CSV with top-K)

import numpy as np


TOPK = 3
rows = []


def load_image(p):
    im = Image.open(p).convert("RGB")
    return infer_tf(im).unsqueeze(0)


images = sorted([p for p in INFD.rglob("*") if p.suffix.lower() in (".jpg",".jpeg",".png",".webp")])


for p in tqdm(images):
    x = load_image(p).to(device)
    with torch.no_grad():
        logits = model(x)
        probs  = torch.softmax(logits, dim=1).squeeze(0).cpu().numpy()
    topk_idx = probs.argsort()[::-1][:TOPK]
    pred_idx = int(topk_idx[0])
    pred_lbl = classes[pred_idx] if classes else str(pred_idx)
    row = {
        "file": str(p.relative_to(INFD)),
        "pred_label": pred_lbl,
        "pred_index": pred_idx,
        "pred_prob": float(probs[pred_idx])
    }
    for i, k in enumerate(topk_idx, 1):
        row[f"top{i}_label"] = classes[k] if classes else str(k)
        row[f"top{i}_prob"]  = float(probs[k])
    rows.append(row)


df = pd.DataFrame(rows)
csv_path = OUTD/"predictions.csv"
df.to_csv(csv_path, index=False)
csv_path
CSV predictions preview in Jupyter
CSV preview of batch predictions (replace with your screenshot).

(Optional) sort files by predicted label

Export model — TorchScript

example = torch.randn(1,3,224,224, device=device)
ts = torch.jit.trace(model, example)
ts_path = EXPD/"model.ts"
ts.save(ts_path)
ts_path

Export model — ONNX

onnx_path = EXPD/"model.onnx"
torch.onnx.export(
    model, example, onnx_path,
    input_names=["input"], output_names=["logits"],
    dynamic_axes={"input": {0: "batch"}, "logits": {0: "batch"}},
    opset_version=12, do_constant_folding=True
)
onnx_path

(Optional) validate ONNX on CPU

# %pip install onnxruntime --quiet
# import onnxruntime as ort, numpy as np
# sess = ort.InferenceSession(str(onnx_path), providers=["CPUExecutionProvider"])
# x_np = example.detach().cpu().numpy()
# logits = sess.run(["logits"], {"input": x_np})[0]
# logits.shape

FAQ

Where do I put my images?

Drop them under /workspace/data/infer/. Subfolders are OK; the script scans recursively.

My checkpoint uses a different model

Replace SmallCNN with your architecture and ensure the head matches len(classes).

Can I include filenames in results?

Yes — the CSV already includes a relative file column from data/infer/.

Ship it. Then measure it. Then ship it again.