Octomizer Python SDK

Octomizer provides a Python SDK which provides an easy-to-use wrapper around the Octomizer RPC Interface. The Python SDK’s API is documented in Octomizer Python API.

Below is a simple example of using the Python SDK to benchmark and optimize an ONNX model with static inputs using the Octomizer.

#!/usr/bin/env python3

"""
This is a simple example of use of the Octomizer API. It uploads an ONNX version of
the MNIST model, benchmarks it on ONNX-RT, Octomizes it, and returns both the Octomized
model benchmark results and a Python package for the optimized model.

After installing the Octomizer SDK, run as:
  % export OCTOMIZER_API_TOKEN=<your API token>
  % ./octomizer_example.py

Windows-based users should set their API token as an environment variable rather than use the export command, which is not supported in some versions of Windows.

Typical output will look like this:

Uploading model tests/testdata/mnist.onnx...
Benchmarking using ONNX-RT...
ONNX-RT benchmark metrics:
runtime_ms_mean: 0.1281123161315918
runtime_ms_std: 0.3681384027004242

Waiting for Octomization to complete...
Octomized TVM benchmark metrics:
runtime_ms_mean: 0.07081685215234756
runtime_ms_std: 0.019525768235325813
compile_ms: 1084.7218017578125
full_metrics_dataref_uuid: "7df384b3-1d3d-4ef0-ae25-e57588090876"

Saved packaged model to ./mnist_octomized-0.1.0-py3-none-any.whl
"""

from __future__ import annotations

import octomizer.client
import octomizer.models.onnx_model as onnx_model
from octomizer.model_variant import AutoschedulerOptions


def main():
    # Specify the model file and input layer parameters.
    onnx_model_file = "tests/testdata/mnist.onnx"

    # Specify the Python package name for the resulting model.
    model_package_name = "mnist_octomized"

    # Specify the platform to target.
    platform = "broadwell"

    # Create the Octomizer Client instance.
    client = octomizer.client.OctomizerClient()

    # Upload the ONNX model file.
    print(f"Uploading model {onnx_model_file}...")
    model = onnx_model.ONNXModel(
        client,
        name=model_package_name,
        model=onnx_model_file,
        description="Created by octomizer_static_example.py",
    )
    model_variant = model.get_uploaded_model_variant()

    # Benchmark the model and get results. MNIST is a small and simple model,
    # so this should return quickly.
    benchmark_workflow = model_variant.benchmark(platform)
    print("Benchmarking using ONNX-RT...")
    benchmark_workflow.wait()
    if not benchmark_workflow.completed():
        raise RuntimeError(
            f"Workflow did not complete, status is {benchmark_workflow.status()}"
        )
    metrics = benchmark_workflow.metrics()
    print(f"ONNX-RT benchmark metrics:\n{metrics}")

    # Octomize the model. Since this is a small model and this is only an example,
    # we set the values in `tuning_options` ridiculously low so it does not take too long to
    # complete. Normally these values would be much higher (the defaults are 1000 and 250,
    # respectively).
    #
    # If you want to autogenerate a specific type of package (default is Python) you can add
    # package_type=octomizer.model_variant.PackageType.PYTHON_PACKAGE or LINUX_SHARED_OBJECT.
    # See the python API documentation for the full list of tuning options.
    octomize_workflow = model_variant.octomize(
        platform,
        tuning_options=AutoschedulerOptions(
            trials_per_kernel=3, early_stopping_threshold=1
        ),
    )
    print("Waiting for Octomization to complete...")
    octomize_workflow.wait()
    if not octomize_workflow.completed():
        raise RuntimeError(
            f"Workflow did not complete, status is {octomize_workflow.status()}"
        )
    metrics = octomize_workflow.metrics()
    # Don't be surprised if the TVM numbers are slower than ONNX-RT -- we didn't
    # run enough rounds of autotuning to ensure great performance!
    print(f"Octomized TVM benchmark metrics:\n{metrics}")

    # Download the package for the optimized model.
    output_filename = octomize_workflow.save_package(out_dir=".")
    print(f"Saved packaged model to {output_filename}")


if __name__ == "__main__":
    main()

Below is a simple example of using the Python SDK to benchmark and optimize an ONNX model with dynamic inputs using the Octomizer.

#!/usr/bin/env python3

"""
This is a simple example of use of the Octomizer API. It uploads an ONNX version of
the mobilenet model, benchmarks it on ONNX-RT, Octomizes it, and returns both the Octomized
model benchmark results and a Python package for the optimized model.

After installing the Octomizer SDK, run as:
  % export OCTOMIZER_API_TOKEN=<your API token>
  % ./octomizer_example.py

Windows-based users should set their API token as an environment variable rather than use the export command, which is not supported in some versions of Windows.

Typical output will look like this:

Uploading model tests/testdata/mobilenet.onnx...
Benchmarking using ONNX-RT...
ONNX-RT benchmark metrics:
runtime_ms_mean: 0.1281123161315918
runtime_ms_std: 0.3681384027004242

Waiting for Octomization to complete...
Octomized TVM benchmark metrics:
runtime_ms_mean: 0.07081685215234756
runtime_ms_std: 0.019525768235325813
compile_ms: 1084.7218017578125
full_metrics_dataref_uuid: "7df384b3-1d3d-4ef0-ae25-e57588090876"

Saved packaged model to ./mobilenet_octomized-0.1.0-py3-none-any.whl
"""

from __future__ import annotations

import octomizer.client
import octomizer.models.onnx_model as onnx_model
from octomizer.model_variant import AutoschedulerOptions


def main():
    # Specify the model file and input layer parameters.
    onnx_model_file = "tests/testdata/mobilenet.onnx"

    # Specify the Python package name for the resulting model.
    model_package_name = "mobilenet_octomized"

    # Specify the platform to target.
    platform = "broadwell"

    # Create the Octomizer Client instance.
    client = octomizer.client.OctomizerClient()

    # Upload the ONNX model file.
    print(f"Uploading model {onnx_model_file}...")
    model = onnx_model.ONNXModel(
        client,
        name=model_package_name,
        model=onnx_model_file,
        description="Created by octomizer_dynamic_example.py",
    )
    model_variant = model.get_uploaded_model_variant()

    # Inputs are not fully inferrable
    inferred_input_shapes, inferred_input_dtypes = model_variant.inputs
    assert inferred_input_shapes == {"input:0": [-1, 3, 224, 224]}
    assert inferred_input_dtypes == {"input:0": "float32"}

    # Disambiguate the inputs. Notice the -1 has been replaced by 1.
    input_shapes = {"input:0": [1, 3, 224, 224]}
    input_dtypes = {"input:0": "float32"}

    # Benchmark the model and get results. Mobilenet is a small and simple model,
    # so this should return quickly.
    benchmark_workflow = model_variant.benchmark(
        platform=platform,
        input_shapes=input_shapes,
        input_dtypes=input_dtypes,
    )
    print("Benchmarking using ONNX-RT...")
    benchmark_workflow.wait()
    if not benchmark_workflow.completed():
        raise RuntimeError(
            f"Workflow did not complete, status is {benchmark_workflow.status()}"
        )
    metrics = benchmark_workflow.metrics()
    print(f"ONNX-RT benchmark metrics:\n{metrics}")

    # Octomize the model. Since this is a small model and this is only an example,
    # we set the values in `tuning_options` ridiculously low so it does not take too long to
    # complete. Normally these values would be much higher (the defaults are 1000 and 250,
    # respectively).
    #
    # If you want to autogenerate a specific type of package (default is Python) you can add
    # package_type=octomizer.model_variant.PackageType.PYTHON_PACKAGE or LINUX_SHARED_OBJECT.
    # See the python API documentation for the full list of tuning options.
    octomize_workflow = model_variant.octomize(
        platform,
        tuning_options=AutoschedulerOptions(
            trials_per_kernel=3, early_stopping_threshold=1
        ),
        input_shapes=input_shapes,
        input_dtypes=input_dtypes,
    )
    print("Waiting for Octomization to complete...")
    octomize_workflow.wait()
    if not octomize_workflow.completed():
        raise RuntimeError(
            f"Workflow did not complete, status is {octomize_workflow.status()}"
        )
    metrics = octomize_workflow.metrics()
    # Don't be surprised if the TVM numbers are slower than ONNX-RT -- we didn't
    # run enough rounds of autotuning to ensure great performance!
    print(f"Octomized TVM benchmark metrics:\n{metrics}")

    # Download the package for the optimized model.
    output_filename = octomize_workflow.save_package(out_dir=".")
    print(f"Saved packaged model to {output_filename}")


if __name__ == "__main__":
    main()

Below is a simple example of using the Python SDK to benchmark and optimize a TFlite model using the Octomizer.

#!/usr/bin/env python3

"""
This is a simple example of use of the Octomizer API. It uploads a TFLite model,
benchmarks it on TVM, Octomizes it, and returns both the Octomized
model benchmark results and a Python package for the optimized model.

After installing the Octomizer SDK, run as:
  % export OCTOMIZER_API_TOKEN=<your API token>
  % ./octomizer_example_tflite.py

Windows-based users should set their API token as an environment variable
rather than use the export command, which is not supported in some versions
of Windows.

Typical output will look like this:

Uploading model tests/testdata/mnist.tflite...
Benchmarking using TVM
TFLite runtime benchmark metrics:
runtime_ms_mean: 0.017525000497698784
runtime_ms_std: 0.0012540124589577317

Waiting for Octomization to complete...
Octomized TVM benchmark metrics:
runtime_ms_mean: 0.024132199585437775
runtime_ms_std: 0.0009970668470486999

Saved packaged model to ./mnist_tflite_octomized-0.1.0-py3-none-any.whl
"""

from __future__ import annotations

import octomizer.client
import octomizer.models.tflite_model as tflite_model
from octomizer.model_variant import AutoschedulerOptions


def main():
    # Specify the model file and input layer parameters.
    tflite_model_file = "tests/testdata/mnist.tflite"

    # Specify the Python package name for the resulting model.
    model_package_name = "mnist_tflite_octomized"

    # Specify the platform to target.
    platform = "rasp4b"

    # Create the Octomizer Client instance.
    client = octomizer.client.OctomizerClient()

    # Upload the TFLite model file.
    print(f"Uploading model {tflite_model_file}...")
    model = tflite_model.TFLiteModel(
        client,
        name=model_package_name,
        model=tflite_model_file,
        description="Created by octomizer_example_tflite.py",
    )
    model_variant = model.get_uploaded_model_variant()

    # Benchmark the model and get results. MNIST is a small and simple model,
    # so this should return quickly.
    benchmark_workflow = model_variant.benchmark(platform)
    print("Benchmarking using TVM")
    benchmark_workflow.wait(timeout=1200)
    if not benchmark_workflow.completed():
        raise RuntimeError(
            f"Workflow did not complete, status is {benchmark_workflow.status()}"
        )
    metrics = benchmark_workflow.metrics()
    print(f"TFLite runtime benchmark metrics:\n{metrics}")

    # Octomize the model. Since this is a small model and this is only an example,
    # we set the values in `tuning_options` ridiculously low so it does not take too long to
    # complete. Normally these values would be much higher (the defaults are 1000 and 250,
    # respectively).
    #
    # If you want to autogenerate a specific type of package (default is Python) you can add
    # package_type=octomizer.model_variant.PackageType.PYTHON_PACKAGE or LINUX_SHARED_OBJECT.
    # See the python API documentation for the full list of tuning options.
    octomize_workflow = model_variant.octomize(
        platform,
        tuning_options=AutoschedulerOptions(
            trials_per_kernel=3, early_stopping_threshold=1
        ),
    )
    print("Waiting for Octomization to complete...")
    octomize_workflow.wait(timeout=1200)
    if not octomize_workflow.completed():
        raise RuntimeError(
            f"Workflow did not complete, status is {octomize_workflow.status()}"
        )
    metrics = octomize_workflow.metrics()
    # Don't be surprised if the TVM numbers are slower than TensorFlow runtime -- we didn't
    # run enough rounds of autotuning to ensure great performance!
    print(f"Octomized TVM benchmark metrics:\n{metrics}")

    # Download the package for the optimized model.
    output_filename = octomize_workflow.save_package(out_dir=".")
    print(f"Saved packaged model to {output_filename}")


if __name__ == "__main__":
    main()