Compile and call Max model from c++

I have a simple CNN implemented in MAX. Is there a way of building it and link/call it from an C++ application. I left the code below, for reference,

Thanks!

from typing import Any
import numpy as np
import max.nn as nn
from max import driver
from max import engine
from max.dtype import DType
from max.graph import DeviceRef, Graph, TensorType, ops, Weight


def build_model() -> Graph:
    # 1. Build the graph
    batch_size = 1
    height = 512
    width = 612
    in_channels = 3
    out_channels = 8
    kernel_size = 3
    input_type = TensorType(dtype=DType.float32, shape=(batch_size, height, width, in_channels), device=DeviceRef.GPU())
    with Graph("cnn", input_types=(input_type,)) as graph:
        rgb = graph.inputs[0]
        # Create weights for convolution
        convs = []
        for i in range(5):
            if i == 4:
                out_channels = 3
            else:
                out_channels = 8
            convs.append(_build_conv2(in_channels, out_channels, kernel_size, i))
            in_channels = out_channels

        for conv in convs:
            rgb = conv(rgb)
            # Apply activation function
            rgb = ops.relu(rgb)

        graph.output(rgb)

    return graph


def _build_conv2(in_channels: int, out_channels: int, kernel_size: int, layer_idx: int) -> nn.Conv2DV1:

    filter_2d = Weight(
        name=f"layer{layer_idx}_weight",
        dtype=DType.float32,
        shape=[kernel_size, kernel_size, in_channels, out_channels],
        device=DeviceRef.GPU(),
    )
    bias_2d = Weight(name=f"layer{layer_idx}_bias", dtype=DType.float32, shape=[out_channels], device=DeviceRef.GPU())
    # Create and apply Conv1D layer
    return nn.Conv2DV1(
        filter=filter_2d,
        bias=bias_2d,
        stride=1,
        padding=1,
    )


if __name__ == "__main__":
    w = np.load("params.npz")
    wr = {k: w[k] for k in w.keys()}
    g = build_model()

    im = np.zeros((1, 512, 612, 3), dtype=np.float32)
    im = driver.Tensor.from_numpy(im).to(device=driver.Accelerator())

    session = engine.InferenceSession(devices=[driver.Accelerator()])
    model = session.load(g, weights_registry=wr)
    for tensor in model.input_metadata:
        print(f"name: {tensor.name}, shape: {tensor.shape}, dtype: {tensor.dtype}")

    # 3. Execute the graph
    ret = model.execute(im)
    print("result:", ret)

Due to the deprecation of the Mojo and C MAX APIs, there isn’t really a good way to do this that doesn’t involve a round trip through python.

Thanks for the response.

So if I want to do this I would have to write the whole model in pure mojo without using MAX?

Yes, however, that would mean you would still have access to the MAX kernels, which would likely be quite helpful. You’re going to take a bit of a performance hit since losing out on “MAX the graph compiler” means you lose a bunch of optimizations you might have to rewrite yourself.

Sadly MAX doesn’t presently have a solution for environments where Python isn’t acceptable.