Jupyter — Batch Inference & Model Export

Overview

This page runs batch inference for an image classification model you trained earlier, then exports the model to TorchScript and ONNX. Results are written to CSV with top-K probabilities. All paths assume the standard CloudDock layout with a writable /workspace/.

Tip: You can monitor VRAM and CPU in the System panel in the Launcher while this notebook runs.

Folders

/workspace/
  runs/your_run_id/
    best.pt
    meta.json
  data/
    infer/                      # put images here (jpg/png/webp)
      *.jpg
  outputs/
    infer/
      predictions.csv
  exports/
    model.ts                    # TorchScript
    model.onnx                  # ONNX

Setup

%pip install torch torchvision pillow pandas tqdm --quiet

Load checkpoint

import torch, json
from pathlib import Path
from collections import OrderedDict
from torchvision import transforms


ROOT = Path("/workspace")
RUN  = ROOT/"runs/cls_exp1"          
CKPT = RUN/"best.pt"                 
META = RUN/"meta.json"               
OUTD = ROOT/"outputs/infer"; OUTD.mkdir(parents=True, exist_ok=True)


# 1)
classes = []
if META.exists():
    try:
        meta = json.load(open(META))
        classes = meta.get("classes", []) or meta.get("class_names", [])
    except Exception as e:
        print("meta.json not usable:", e)


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")



import torch.nn as nn, torch.nn.functional as F
class SmallCNN(nn.Module):
    def __init__(self, n):
        super().__init__()
        self.conv1=nn.Conv2d(3,32,3,padding=1); self.conv2=nn.Conv2d(32,64,3,padding=1); self.conv3=nn.Conv2d(64,128,3,padding=1)
        self.pool=nn.MaxPool2d(2,2); self.head=nn.Linear(128,n)
    def forward(self,x):
        x=self.pool(F.relu(self.conv1(x))); x=self.pool(F.relu(self.conv2(x))); x=self.pool(F.relu(self.conv3(x)))
        x=F.adaptive_avg_pool2d(x,(1,1)).flatten(1); return self.head(x)


def strip_module_prefix(sd: dict):
    if any(k.startswith("module.") for k in sd.keys()):
        return OrderedDict((k.replace("module.", "", 1), v) for k,v in sd.items())
    return sd


def load_model_from(CKPT, num_classes_hint=None):
    obj = torch.load(CKPT, map_location=device)


    # a)
    if isinstance(obj, nn.Module):
        m = obj.to(device).eval()
        print("[load] loaded full nn.Module; skip state_dict")
        return m


    # b)
    if isinstance(obj, dict):
        for key in ("classes", "class_names"):
            if key in obj and not classes:
                try:
                    cls = obj[key]
                    if isinstance(cls, (list, tuple)):
                        globals()["classes"] = list(cls)
                except: pass


        cand = None
        if "model" in obj and isinstance(obj["model"], dict):
            cand = obj["model"]
            print("[load] using ckpt['model']")
        elif "state_dict" in obj and isinstance(obj["state_dict"], dict):
            cand = obj["state_dict"]
            print("[load] using ckpt['state_dict']")
        elif all(isinstance(v, torch.Tensor) for v in obj.values()):
            cand = obj
            print("[load] using raw state_dict (top-level)")


        if cand is None:
            raise ValueError(f"Unknown checkpoint dict format. Keys: {list(obj.keys())[:10]} ...")


        cand = strip_module_prefix(cand)
        n_cls = len(classes) if classes else (num_classes_hint or 2)
        m = SmallCNN(n_cls).to(device).eval()
        missing, unexpected = m.load_state_dict(cand, strict=False)
        if missing:   print("[load] missing keys:", missing)
        if unexpected:print("[load] unexpected keys:", unexpected)
        return m


    raise TypeError(f"Unsupported checkpoint object type: {type(obj)}")


model = load_model_from(CKPT, num_classes_hint=2) 
print("Classes:", classes if classes else "(unknown — using indices)")


infer_tf = transforms.Compose([transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor()])

Path Ckeck

from pathlib import Path
INFD = Path("/workspace/data/images/clean") 
print("INFD resolves to:", INFD.resolve())


all_files = [p for p in INFD.rglob("*") if p.is_file()]
print("total files under INFD:", len(all_files))
print("exts:", sorted({p.suffix.lower() for p in all_files})[:20])


images = [p for p in all_files if p.suffix.lower() in (".jpg", ".jpeg", ".png", ".webp")]
print("images matched:", len(images))
print("first 5:", [str(p.relative_to(INFD)) for p in images[:5]])

Batch inference (CSV with top-K)

import numpy as np


TOPK = 3
rows = []


def load_image(p):
    im = Image.open(p).convert("RGB")
    return infer_tf(im).unsqueeze(0)


images = sorted([p for p in INFD.rglob("*") if p.suffix.lower() in (".jpg",".jpeg",".png",".webp")])


for p in tqdm(images):
    x = load_image(p).to(device)
    with torch.no_grad():
        logits = model(x)
        probs  = torch.softmax(logits, dim=1).squeeze(0).cpu().numpy()
    topk_idx = probs.argsort()[::-1][:TOPK]
    pred_idx = int(topk_idx[0])
    pred_lbl = classes[pred_idx] if classes else str(pred_idx)
    row = {
        "file": str(p.relative_to(INFD)),
        "pred_label": pred_lbl,
        "pred_index": pred_idx,
        "pred_prob": float(probs[pred_idx])
    }
    for i, k in enumerate(topk_idx, 1):
        row[f"top{i}_label"] = classes[k] if classes else str(k)
        row[f"top{i}_prob"]  = float(probs[k])
    rows.append(row)


df = pd.DataFrame(rows)
csv_path = OUTD/"predictions.csv"
df.to_csv(csv_path, index=False)
csv_path

CSV preview of batch predictions (replace with your screenshot).

(Optional) sort files by predicted label

/import shutil
SORTD = OUTD/"sorted"
for _, r in df.iterrows():
    src = INFD/str(r["file"])
    dst = SORTD/str(r["pred_label"])/src.name
    dst.parent.mkdir(parents=True, exist_ok=True)
    try:
        shutil.copy2(src, dst)
    except Exception as e:
        print("skip:", src, e)
SORTD

Export model — TorchScript

example = torch.randn(1,3,224,224, device=device)
ts = torch.jit.trace(model, example)
ts_path = EXPD/"model.ts"
ts.save(ts_path)
ts_path

Export model — ONNX

onnx_path = EXPD/"model.onnx"
torch.onnx.export(
    model, example, onnx_path,
    input_names=["input"], output_names=["logits"],
    dynamic_axes={"input": {0: "batch"}, "logits": {0: "batch"}},
    opset_version=12, do_constant_folding=True
)
onnx_path

(Optional) validate ONNX on CPU

# %pip install onnxruntime --quiet
# import onnxruntime as ort, numpy as np
# sess = ort.InferenceSession(str(onnx_path), providers=["CPUExecutionProvider"])
# x_np = example.detach().cpu().numpy()
# logits = sess.run(["logits"], {"input": x_np})[0]
# logits.shape

FAQ

Where do I put my images?

Drop them under /workspace/data/infer/. Subfolders are OK; the script scans recursively.

My checkpoint uses a different model

Replace SmallCNN with your architecture and ensure the head matches len(classes).

Can I include filenames in results?

Yes — the CSV already includes a relative file column from data/infer/.

Ship it. Then measure it. Then ship it again.

← Back to Documents