Compile and call Max model from c++

larsrpe · May 21, 2025, 1:03pm

I have a simple CNN implemented in MAX. Is there a way of building it and link/call it from an C++ application. I left the code below, for reference,

Thanks!

from typing import Any
import numpy as np
import max.nn as nn
from max import driver
from max import engine
from max.dtype import DType
from max.graph import DeviceRef, Graph, TensorType, ops, Weight


def build_model() -> Graph:
    # 1. Build the graph
    batch_size = 1
    height = 512
    width = 612
    in_channels = 3
    out_channels = 8
    kernel_size = 3
    input_type = TensorType(dtype=DType.float32, shape=(batch_size, height, width, in_channels), device=DeviceRef.GPU())
    with Graph("cnn", input_types=(input_type,)) as graph:
        rgb = graph.inputs[0]
        # Create weights for convolution
        convs = []
        for i in range(5):
            if i == 4:
                out_channels = 3
            else:
                out_channels = 8
            convs.append(_build_conv2(in_channels, out_channels, kernel_size, i))
            in_channels = out_channels

        for conv in convs:
            rgb = conv(rgb)
            # Apply activation function
            rgb = ops.relu(rgb)

        graph.output(rgb)

    return graph


def _build_conv2(in_channels: int, out_channels: int, kernel_size: int, layer_idx: int) -> nn.Conv2DV1:

    filter_2d = Weight(
        name=f"layer{layer_idx}_weight",
        dtype=DType.float32,
        shape=[kernel_size, kernel_size, in_channels, out_channels],
        device=DeviceRef.GPU(),
    )
    bias_2d = Weight(name=f"layer{layer_idx}_bias", dtype=DType.float32, shape=[out_channels], device=DeviceRef.GPU())
    # Create and apply Conv1D layer
    return nn.Conv2DV1(
        filter=filter_2d,
        bias=bias_2d,
        stride=1,
        padding=1,
    )


if __name__ == "__main__":
    w = np.load("params.npz")
    wr = {k: w[k] for k in w.keys()}
    g = build_model()

    im = np.zeros((1, 512, 612, 3), dtype=np.float32)
    im = driver.Tensor.from_numpy(im).to(device=driver.Accelerator())

    session = engine.InferenceSession(devices=[driver.Accelerator()])
    model = session.load(g, weights_registry=wr)
    for tensor in model.input_metadata:
        print(f"name: {tensor.name}, shape: {tensor.shape}, dtype: {tensor.dtype}")

    # 3. Execute the graph
    ret = model.execute(im)
    print("result:", ret)

owenhilyard · May 21, 2025, 1:32pm

Due to the deprecation of the Mojo and C MAX APIs, there isn’t really a good way to do this that doesn’t involve a round trip through python.

larsrpe · May 21, 2025, 3:35pm

Thanks for the response.

So if I want to do this I would have to write the whole model in pure mojo without using MAX?

owenhilyard · May 21, 2025, 3:37pm

Yes, however, that would mean you would still have access to the MAX kernels, which would likely be quite helpful. You’re going to take a bit of a performance hit since losing out on “MAX the graph compiler” means you lose a bunch of optimizations you might have to rewrite yourself.

Sadly MAX doesn’t presently have a solution for environments where Python isn’t acceptable.

Topic		Replies	Views
Porting various models to MAX MAX	6	208	May 8, 2025
New resources for building models in MAX Models & Pipelines gpu	0	64	June 27, 2025
Hand written kernels in MAX MAX discussion	3	232	June 8, 2025
How to import and run an exported MAX Model from MEF MAX discussion , feature-request , 24_6	4	74	August 16, 2025
ONNX: difference in MAX cpu <-> gpu execution MAX debugging , 25_2	3	275	April 15, 2025

Compile and call Max model from c++

Related topics