Octomizer RPC Interface

The Octomizer service provides an RPC interface via gRPC and REST. The Octomizer SDK is a Python wrapper around the gRPC interface with some convenience functions. All capabilities of the Octomizer can be exercised via the gRPC or REST interface, while the Python SDK and Web UI provide more limited (but easier to use) functionality. This guide demonstrates how to use the underlying RPC interface directly.

gRPC usage

The SDK includes autogenerated Python wrappers for each of the protobuf messages below, in the octoml.octomizer.v1 package. For example, you can instantiate a Model protobuf message as follows:

import octoml.octomizer.v1.models_pb2 as models_pb2
my_model = models_pb2.Model()

Here is an example of invoking the gRPC interface from Python:

import grpc
from octomizer.auth import AuthInterceptor
from octoml.octomizer.v1 import octomizer_service_pb2, octomizer_service_pb2_grpc

# Create gRPC stub. Pass in your `access token` below.
credentials = grpc.ssl_channel_credentials()
channel = grpc.secure_channel("api.octoml.ai:443", credentials)
channel = grpc.intercept_channel(channel, AuthInterceptor(access_token))
stub = octomizer_service_pb2_grpc.OctomizerServiceStub(channel)

# Example - list Models.
request = octomizer_service_pb2.ListModelsRequest()
response = stub.ListModels(request)
print(f"Got back model list: {response}")

REST API usage

Each of the gRPC interfaces has a corresponding REST interface, using JSON-encoded message bodies in place of protobufs. The REST endpoint for each gRPC call is documented in the OctomizerService proto definition below. For example, to list models using the REST API using curl:

$ curl -H "Authorization: Bearer $OCTOMIZER_API_TOKEN" https://api.octoml.ai/v1/models

The above example assumes that the environment variable OCTOMIZER_API_TOKEN contains your access token.

Protobuf definitions

The protobuf definitions for the gRPC interface are detailed below.

OctomizerService

OctomizerService is the top-level gRPC interface to the Octomizer.

// Octomizer Service API.

syntax = "proto3";

package octoml.octomizer.v1;

option go_package = "octoml.ai/api_rest_proxy/gen/octoml/octomizer/v1";

import "octoml/octomizer/v1/tokens.proto";
import "octoml/octomizer/v1/hardware_targets.proto";
import "octoml/octomizer/v1/datarefs.proto";
import "octoml/octomizer/v1/models.proto";
import "octoml/octomizer/v1/users.proto";
import "octoml/octomizer/v1/workflows.proto";
import "octoml/octomizer/v1/ingest_model_status.proto";

import "google/api/annotations.proto";
import "google/protobuf/empty.proto";
import "google/protobuf/field_mask.proto";
import "google/protobuf/timestamp.proto";

// Service interface for the Octomizer API.
service OctomizerService {
    // Create an Access Token. This token can be used as an authentication
    // credential for access to the Octomizer API.
    rpc CreateAccessToken(CreateAccessTokenRequest) returns (CreateAccessTokenResponse) {
        option (google.api.http) = {
            post: "/v1/tokens"
            body: "token"
        };
    }

    // Delete the given Access Token.
    rpc DeleteAccessToken(DeleteAccessTokenRequest) returns (google.protobuf.Empty) {
        option (google.api.http) = {
            delete: "/v1/tokens/{token_uuid}"
        };
    }

    // Retrieve the metadata for the given Access Token. This will not return the
    // Access Token secret, which is only returned on the initial CreateAccessToken
    // call.
    rpc GetAccessToken(GetAccessTokenRequest) returns (AccessToken) {
        option (google.api.http) = { get: "/v1/tokens/{token_uuid}" };
    }

    // List metadata for Access Tokens for this user. This will not return the Access Token
    // secret, which is only returned on the initial CreateAccessToken call.
    rpc ListAccessTokens(ListAccessTokensRequest) returns (ListAccessTokensResponse) {
        option (google.api.http) = { get: "/v1/tokens" };
    }

    // Create a new DataRef. This establishes the metadata for the DataRef in the system, and
    // the user must subsequently upload the data object itself via the URL returned in the
    // response.
    rpc CreateDataRef(CreateDataRefRequest) returns (CreateDataRefResponse) {
        option (google.api.http) = {
            post: "/v1/datarefs"
            body: "dataref"
        };
    }

    // Delete the given DataRef. Note that this will not delete any objects which reference
    // the DataRef, but both the DataRef's metadata and contents will no longer be accessible.
    rpc DeleteDataRef(DeleteDataRefRequest) returns (google.protobuf.Empty) {
        option (google.api.http) = {
            delete: "/v1/datarefs/{dataref_uuid}"
        };
    }

    // Retrieve the metadata for the given DataRef. The DataRef includes a short-term-use URL
    // which can be used to download the data object contents.
    rpc GetDataRef(GetDataRefRequest) returns (DataRef) {
        option (google.api.http) = { get: "/v1/datarefs/{dataref_uuid}" };
    }

    // Create a new Model.
    rpc CreateModel(CreateModelRequest) returns (Model) {
        option (google.api.http) = {
            post: "/v1/models"
            body: "*"
        };
    }

    // Delete the given Model. This will implicitly delete all ModelVariants, Jobs, Workloads,
    // and other objects in the system which reference this Model.
    rpc DeleteModel(DeleteModelRequest) returns (google.protobuf.Empty) {
        option (google.api.http) = {
            delete: "/v1/models/{model_uuid}"
        };
    }

    // Return information on a Model.
    rpc GetModel(GetModelRequest) returns (Model) {
        option (google.api.http) = { get: "/v1/models/{model_uuid}" };
    }

    // List Models which the caller has access to view.
    rpc ListModels(ListModelsRequest) returns (ListModelsResponse) {
        option (google.api.http) = { get: "/v1/models" };
    }

    // Update the given Model. Note that only the "description" and "labels" fields may be
    // modified by the client.
    rpc UpdateModel(UpdateModelRequest) returns (Model) {
        option (google.api.http) = {
            patch: "/v1/models/{model.uuid}"
            body: "*"
        };
    }

    // Create a new ModelVariant, which will typically reference a previously-uploaded DataRef
    // containing the model contents. Note that multiple ModelVariants can reference the same
    // DataRef(s) for the model contents. Because DataRefs are immutable, this is safe.
    //
    // This API call is intended only for internal usage; external clients should not invoke
    // this endpoint.
    rpc CreateModelVariant(CreateModelVariantRequest) returns (ModelVariant) {
        option (google.api.http) = {
            post: "/v1/models/{model_variant.model_uuid}/variants"
            body: "model_variant"
        };
    }

    // Return information on a ModelVariant.
    rpc GetModelVariant(GetModelVariantRequest) returns (ModelVariant) {
        option (google.api.http) = { get: "/v1/modelvariants/{model_variant_uuid}" };
    }

    // List ModelVariants which the caller has access to view.
    rpc ListModelVariants(ListModelVariantsRequest) returns (ListModelVariantsResponse) {
        option (google.api.http) = { get: "/v1/models/{model_uuid}/variants" };
    }

    // Update the given ModelVariant. Note that only certain fields of a ModelVariant may be
    // modified once it is created, such as the "description" and "labels" fields. In general,
    // fields which pertain to the contents, configuration, or other parameters of a ModelVariant
    // may not be modified.
    rpc UpdateModelVariant(UpdateModelVariantRequest) returns (ModelVariant) {
        option (google.api.http) = {
            patch: "/v1/modelvariants/{model_variant.uuid}"
            body: "*"
        };
    }

    // Create a new Workflow for the given Model.
    rpc CreateWorkflow(CreateWorkflowRequest) returns (Workflow) {
        option (google.api.http) = {
            post: "/v1/models/{workflow.model_uuid}/workflows"
            body: "workflow"
        };
    }

    // Return information on a Workflow.
    rpc GetWorkflow(GetWorkflowRequest) returns (Workflow) {
        option (google.api.http) = { get: "/v1/workflows/{workflow_uuid}" };
    }

    // List Workflows for the given Model.
    rpc ListWorkflows(ListWorkflowsRequest) returns (ListWorkflowsResponse) {
        option (google.api.http) = { get: "/v1/models/{model_uuid}/workflows" };
    }

    // List all Workflows matching the given filter.
    rpc ListAllWorkflows(ListAllWorkflowsRequest) returns (ListAllWorkflowsResponse) {
        option (google.api.http) = {
            get: "/v1/workflows"
        };
    }

    // Gets Usage for an account
    rpc GetUsage(GetUsageRequest) returns (GetUsageResponse) {
        option (google.api.http) = { get: "/v1/workflows/usage" };
    }

    // Cancel the given Workflow, without deleting it. This will return the Workflow and its
    // current status.
    rpc CancelWorkflow(CancelWorkflowRequest) returns (Workflow) {
        option (google.api.http) = {
            post: "/v1/workflows/{workflow_uuid}:cancel"
            body: "*"
        };
    }

    // Returns information on the given User. Fails if the client is not a member
    // of the Account of the requested User, or if the User does not exist.
    rpc GetUser(GetUserRequest) returns (User) {
        option (google.api.http) = {
            get: "/v1/users/{user_uuid}"
        };
    }

    // Returns information on the currently logged-in User.
    rpc GetCurrentUser(google.protobuf.Empty) returns (User) {
        option (google.api.http) = {
            get: "/v1/users/current"
        };
    }

    // Returns information on the given Account. Fails if the account_uuid is not
    // the same as that of the client, or if the account does not exist.
    rpc GetAccount(GetAccountRequest) returns (Account) {
        option (google.api.http) = {
            get: "/v1/accounts/{account_uuid}"
        };
    }

    // Returns a paginated listing of users in an account.
    rpc GetAccountUsers(GetAccountUsersRequest) returns (GetAccountUsersResponse) {
        option (google.api.http) = {
            get: "/v1/accounts/{account_uuid}/users"
        };
    }

   // Create an Octomizer user. This method can only be called by account administrators.
   rpc CreateUser(CreateUserRequest) returns (User) {
       option (google.api.http) = {
         post: "/v1/users"
         body: "*"
       };
   }

   // Update an Octomizer user. This method can only be called by account administrators.
   rpc UpdateUser(UpdateUserRequest) returns (User) {
     option (google.api.http) = {
       patch: "/v1/users/{user.uuid}"
       body: "*"
     };
   }

   // Delete an Octomizer user. This method can only be called by account administrators.
   // This is only valid if the user is not the owner of any models.
   rpc DeleteUser(DeleteUserRequest) returns (google.protobuf.Empty) {
     option (google.api.http) = {
       delete: "/v1/users/{user_uuid}"
     };
   }

   // List the HardwareTargets available under the user's account.
   rpc GetHardwareTargets(GetHardwareTargetsRequest) returns (GetHardwareTargetsResponse) {
     option (google.api.http) = {
       get: "/v1/hardware_targets/{account_uuid}"
     };
   }

   // Update the account.
   rpc UpdateAccount(UpdateAccountRequest) returns (Account) {
     option (google.api.http) = {
       patch: "/v1/accounts/{account.uuid}"
       body: "*"
     };
   }

   // Creates a model ingest job and returns a reference it.
   rpc CreateIngestModel(CreateIngestModelRequest) returns (IngestModelStatus) {
       option (google.api.http) = { 
         post: "/v1/ingest_model" 
         body: "*"
       };
   }

   // Return the model ingest job's status.
   rpc GetIngestModelStatus(GetIngestModelStatusRequest) returns (IngestModelStatus) {
       option (google.api.http) = { get: "/v1/ingest_model/{uuid}" };
   }

   // Return a list of the model ingest jobs.
   rpc ListIngestModelStatus(ListIngestModelStatusRequest) returns (ListIngestModelStatusResponse) {
       option (google.api.http) = { get: "/v1/ingest_model" };
   }
}

message CreateAccessTokenRequest {
    // Content of the AccessToken object to create.
    AccessToken token = 1;
}

message CreateAccessTokenResponse {
    // Metadata for the AccessToken.
    AccessToken token = 1;

    // Secret for this AccessToken, which can be provided as an authentication credential
    // to subsequent API calls to act as the user that requested this token.
    string secret = 2;
}

message DeleteAccessTokenRequest {
    // The UUID of the AccessToken object to delete.
    string token_uuid = 1;
}

message GetAccessTokenRequest {
    // The UUID of the AccessToken to return.
    string token_uuid = 1;
}

message ListAccessTokensRequest {
    // The number of results to return in the response. If page size is 0, all
    // access tokens for this user are listed.
    uint32 page_size = 1;
    // The page token provided by a previous request to ListAccessTokens.
    string page_token = 2;
}

message ListAccessTokensResponse {
    // The list of AccessTokens. The secret will not be populated.
    repeated AccessToken tokens = 1;
    // Token used to request the next page of results.
    string next_page_token = 2;
}

message CreateDataRefRequest {
    // Content of the DataRef object to create.
    DataRef dataref = 1;
}

message CreateDataRefResponse {
    // Metadata for the (not yet populated) DataRef created by the corresponding request.
    DataRef dataref = 1;

    // URL where the client should upload the contents of the object to be referenced by
    // this DataRef. The URL must be used shortly after the CreateDataRefResponse
    // message has been received (see URL's X-Goog-Expires param). This URL is only
    // accessible to the User that issued the original CreateDataRefRequest.
    //
    // Request:
    // PUT <upload_url>
    // content-type: application/octet-stream
    // x-goog-content-sha256: <DataRef.sha256>
    // body: <ModelVariant contents>
    //
    // Response:
    // 200 OK with an empty response body
    string upload_url = 2;
}

message DeleteDataRefRequest {
    // The UUID of the DataRef object to delete.
    string dataref_uuid = 1;
}

message GetDataRefRequest {
    // The UUID of the DataRef to return.
    string dataref_uuid = 1;
}

message CreateModelRequest {
    // Metadata for the model to create.
    // Note that the client does not specify all fields in the request.
    Model model = 1;
    // Format-specific configuration for the new Model.
    ModelFormatConfig model_format_config = 2;
}

message DeleteModelRequest {
    // The UUID of the Model object to delete.
    string model_uuid = 1;
}

message GetModelRequest {
    // The UUID of the Model to return.
    string model_uuid = 1;
}


message ListModelsRequest {
    // The number of results to return in the response. If page size is 0, all models visible
    // to the user are listed.
    uint32 page_size = 1;
    // The page token provided by a previous request to ListModels.
    string page_token = 2;

    // The field by which to order the results, for e.g. uuid.
    // Supported fields are uuid, owned_by, create_time and update_time.
    // The default sort order is ascending for uuid and owned_by and descending for
    // create_time and update_time. To get the results in a specific
    // order, it should be suffixed to the field name 
    // e.g. "uuid desc" or "create_time asc"
    string order_by = 3;
}

message ListModelsResponse {
    // The list of Models.
    repeated Model models = 1;
    // Token used to request the next page of results.
    string next_page_token = 2;
}

// TODO OCTO-1409: Once ImportModel flow is established, remove "inputs" as an update-able
// value below.
message UpdateModelRequest {
    // The Model resource to update.
    Model model = 1;

    // The fields to update in the server's Model definition. Currently the supported updatable
    // fields are ["name", "description", "labels", "inputs"].
    google.protobuf.FieldMask update_mask = 2;

    // If true, repeated fields specified by the update_mask will overwrite values in the
    // target model_variant, rather than append.
    bool overwrite = 3;
}

message CreateModelVariantRequest {
    // Data for the ModelVariant to create.
    // Note that the client does not specify all fields in the request.
    // The request will return an error if the corresponding Model already has a ModelVariant
    // associated with it.
    ModelVariant model_variant = 1;
}

message GetModelVariantRequest {
    // The UUID of the ModelVariant to return.
    string model_variant_uuid = 1;
}

message ListModelVariantsRequest {
    // The model UUID for which to list model variants.
    string model_uuid = 1;
    // The number of results to return in the response.
    uint32 page_size = 2;
    // The page token provided by a previous request to ListModelVariants.
    string page_token = 3;
}

message ListModelVariantsResponse {
    // The list of ModelVariants.
    repeated ModelVariant model_variants = 1;
    // Token used to request the next page of results.
    string next_page_token = 2;
}

message UpdateModelVariantRequest {
    // The ModelVariant resource to update.
    ModelVariant model_variant = 1;

    // The fields to update in the server's ModelVariant definition.
    google.protobuf.FieldMask update_mask = 2;

    // If true, repeated fields specified by the update_mask will overwrite values in the
    // target model_variant, rather than append.
    bool overwrite = 3;
}

message CreateWorkflowRequest {
    // Specification for the Workflow to create.
    Workflow workflow = 1;
}

message GetWorkflowRequest {
    // The UUID of the Workflow to return.
    string workflow_uuid = 2;
}

message ListWorkflowsRequest {
    // The model UUID for the Workflows to list.
    string model_uuid = 1;
    // The number of results to return in the response.
    uint32 page_size = 2;
    // The page token provided by a previous request to ListWorkflows.
    string page_token = 3;
}

message ListWorkflowsResponse {
    // The list of Workflows.
    repeated Workflow workflows = 1;
    // Token used to request the next page of results.
    string next_page_token = 2;
}

message ListAllWorkflowsRequest {
    // Beginning of time range for Workflows to be returned.
    // A Workflow must have been created on or after this time.
    // If unset, defaults to the zero timestamp.
    google.protobuf.Timestamp start_time = 1;

    // End of time range for Workflows to be returned.
    // A Workflow must have been created or updated before this time.
    // If unset, defaults to the infinite timestamp.
    google.protobuf.Timestamp end_time = 2;

    // If a non-empty string, return only Workflows matching the given
    // model UUID. Conjunctive with other filter predicates.
    string model_uuid = 3;

    // If a non-empty string, return only Workflows matching the given
    // account UUID. Conjunctive with other filter predicates.
    string account_uuid = 4;

    // If a non-empty string, return only Workflows matching the given
    // user UUID. Conjunctive with other filter predicates.
    string user_uuid = 5;

    // Only list Workflows matching any of the given states.
    // As a whole the list of states is conjunctive with other filter
    // predicates.
    repeated WorkflowStatus.WorkflowState state = 6;

    // The number of results to return in the response.
    // Note: OCTO-1466 page_size and state do not work together. When
    // filtering by state, avoid specifying a page_size, and vice-versa.
    uint32 page_size = 7;
    // The page token provided by a previous request to ListWorkflows.
    string page_token = 8;
}

message ListAllWorkflowsResponse {
    // The list of Workflows.
    repeated octoml.octomizer.v1.Workflow workflows = 1;
    // Token used to request the next page of results.
    string next_page_token = 2;
}

message GetUsageRequest {
    // The start time for the workflow search.
    google.protobuf.Timestamp start_time = 1;
    // The end time for the workflow search.
    google.protobuf.Timestamp end_time = 2;
    // The Account Uuid to check usage against.
    string account_uuid = 3;
}

message HardwareUsage {
    // Uuid corrensponding to the user who started this usage instance.
    string user_uuid = 1;
    // Given Name corrensponding to the user who started this usage instance.
    string given_name = 2;
    // Family Name corrensponding to the user who started this usage instance.
    string family_name = 3;
    // Hardware target of this usage instance.
    string target = 4;
    // State of this usage instance (such as Running or Failed).
    WorkflowStatus.WorkflowState state = 5;
    // Corresponds to the number of threads allocated to this instance.
    oneof num_threads {
        uint32 tvm_num_threads = 6;
    }
    // Number of autotune instances which match to the properties above.
    int32 autotune_count = 7;
}

message GetUsageResponse {
    // The usage data to the account broken down by user.
    repeated HardwareUsage user_usage = 1;
}

message CancelWorkflowRequest {
    // The UUID of the Workflow to cancel.
    string workflow_uuid = 1;
}

message GetUserRequest {
    // The UUID of the User to return.
    string user_uuid = 1;
}

message GetAccountRequest {
    // The UUID of the Account to return.
    string account_uuid = 1;
}

message GetAccountUsersRequest {
    // The UUID of the account whose users to return.
    string account_uuid = 1;
    // The number of users to return in the response.
    uint32 page_size = 2;
    // The token from the previous invocation of GetAccountUsers.
    string page_token = 3;
}

message GetAccountUsersResponse {
    // Users that are a part of the account.
    repeated User members = 1;
    // Token used to return the next page of results.
    string next_page_token = 2;
}
  
message CreateUserRequest {
    // Specification of the User to create.
    User user = 1;
}

message UpdateUserRequest {
    // The user to update.
    User user = 1;

    // The fields to update in the server's User definition.
    google.protobuf.FieldMask update_mask = 2;

    // If true, repeated fields specified by the update_mask will overwrite values in the
    // target user, rather than append.
    bool overwrite = 3;
}

message DeleteUserRequest {
    // The user to delete.
    string user_uuid = 1;
}

message UpdateAccountRequest {
    Account account = 1;

    // The fields to update in the server's Account definition.
    google.protobuf.FieldMask update_mask = 2;

    // If true, repeated fields specified by the update_mask will overwrite values in the
    // targe taccount, rather than append.
    bool overwrite = 3;
}

message GetHardwareTargetsRequest {
    // The UUID of the account whose targets to return.
    string account_uuid = 1;
    // The number of targets to return in the response.
    uint32 page_size = 2;
    // The token from the previous invocation of GetHardwareTargets.
    string page_token = 3;
}

message GetHardwareTargetsResponse {
    // HardwareTargets that are part of the account.
    repeated HardwareTarget hardware_targets = 1;
    // Token used to return the next page of results.
    string next_page_token = 2;
}

message CreateIngestModelRequest {
    // Metadata for the model to create.
    ModelUpload model_upload = 1;
}

message GetIngestModelStatusRequest {
    // The UUID of the model ingest job to return.
    string uuid = 1;
}

message ListIngestModelStatusRequest {
    // The number of results to return in the response. 
    // If unset defaults to 100.
    uint32 page_size = 1;
    // The page token provided by a previous request to ListIngestModelStatus.
    string page_token = 2;
}

message ListIngestModelStatusResponse {
    // The list of IngestModelStatus.
    repeated IngestModelStatus statuses = 1;
    // Token used to request the next page of results.
    string next_page_token = 2;
}

Users

The User and Account messages provide details on individual users and accounts.

// Protobuf definition for User and Account.

syntax = "proto3";

package octoml.octomizer.v1;

option go_package = "octoml.ai/api_rest_proxy/gen/octoml/octomizer/v1";

// Represents a User, containing both profile information and the Account
// that this User belongs to.
message User {
  // The UUID for this user.
  string uuid = 1;
  // The user's given (first) name.
  string given_name = 2;
  // The user's family (last) name.
  string family_name = 3;
  // The user's email address.
  string email = 4;
  // The Account to which this User belongs.
  string account_uuid = 5;
  // Whether the user is active (not offboarded).
  bool active = 6;
  // Permissions within api-server.
  Permissions permissions = 7;
}

// Represents an Account, which is a set of Users. Only users within the
// same Account may share access to resources like Models and Workflows.
message Account {
  // deleted field, reserved to prevent field num reuse.
  reserved 3;

  // The UUID for this Account.
  string uuid = 1;
  // The human-readable display name for this Account.
  string display_name = 2;

  // Whether all the account's models can be viewed by OctoML support staff.
  // Account privacy levels are:
  //   - HighPrivacy (encoded by high_privocy=true and selective_support_models=[])
  //   - SelectivePrivacy (encoded by high_privocy=true and selective_support_models is nonempty)
  //   - HighSupport (encoded by high_privacy=false)
  // Privacy settings determine which customer models are accessible to OctoML support staff.
  // See https://www.notion.so/octoml/Remora-MLSys-Access-to-Octomizer-Epic-3482176dfe7e4cc4b1f7739685479713
  // for more details about account privacy.
  bool high_privacy = 4;

  // Specific models that can be viewed by OctoML support staff.
  repeated string selective_support_models = 5;
}

// A permission for non-Admin Users within api-server.
message Permissions {
  // Whether user is an admin of their own account. This grants the ability to create and
  // modify users in the same account.
  bool is_own_account_admin = 1;
  // Depreciated field.
  reserved 2;
  // This revokes the ability to trigger octomizations.
  bool can_octomize = 3;
  // Depreciated field.
  reserved 4;
}

AccessToken

The AccessToken message represents an API access token created by a user (initially via the web UI) for access to the API surface.

// Protobuf definition for AccessTokens.

syntax = "proto3";

package octoml.octomizer.v1;

option go_package = "octoml.ai/api_rest_proxy/gen/octoml/octomizer/v1";

import "google/protobuf/timestamp.proto";

// Represents metadata for an AccessToken, which provide a mechanism for users to
// authenticate using a secret string. The secret itself is not contained in the
// AccessToken message; rather, it is returned only on a call to CreateAccessToken.
message AccessToken {
    // UUID for this AccessToken. Not specified by clients.
    string uuid = 1;

    // Optional user-provided description for this token.
    string description = 2;

    // The timestamp when this AccessToken was created. Not specified by clients.
    google.protobuf.Timestamp create_time = 3;

    // The timestamp when this AccessToken will expire. If not set, the token
    // will never expire. When creating a token, this value must be in the future.
    google.protobuf.Timestamp expiry_time = 4;
}

DataRef

All data objects managed by Octomizer (model uploads, packages, etc.) have an associated DataRef which provides access to the data content via a downloadable URL. DataRefs are immutable once created, and cannot be individually deleted (to prevent other objects which refer to a DataRef from losing access).

// Protobuf definition for DataRef.

syntax = "proto3";

package octoml.octomizer.v1;

option go_package = "octoml.ai/api_rest_proxy/gen/octoml/octomizer/v1";

import "google/protobuf/timestamp.proto";

// Represents a reference to a persistent data object stored by Octomizer.
// Once a data object has been created and a DataRef associated with it, both the DataRef
// and the object contents are immutable.
message DataRef {
    // UUID of this DataRef. Clients must not specify this field when creating DataRefs.
    string uuid = 1;

    // MIME content-type of the data. Note that this represents the syntactic
    // format of the data object (e.g., application/octet-stream), not the
    // semantic meaning (e.g., model weights or metrics data). Semantic meaning
    // for a given data object is determined by the object referencing this
    // DataRef. Client-created DataRefs must populate this field.
    string content_type = 2;

    // Lowercase SHA256 hash of the contents of the data object. Used for validating data
    // integrity. When a DataRef is created by a client, this field must be provided
    // to indicate the expected SHA256 hash of the object to be uploaded. It is also
    // required later during the upload process.
    // Set to empty string to indicate an empty object.
    string sha256 = 3;

    // The size of the data object, in bytes.
    uint64 size = 4;

    // A URL where the contents of the DataRef may be retrieved via an HTTP GET request.
    // Clients must not specify this field when creating DataRefs.
    //
    // Note that this URL may be time-limited in its validity, and may be subject to access control
    // restrictions (e.g., only valid for the same User that requested the DataRef).
    // This field will be empty for a DataRef which has no data object yet stored by the
    // system (i.e., a newly-created DataRef where the contents have not yet been uploaded
    // by the user).
    string url = 5;

    // The timestamp when this DataRef was created.
    google.protobuf.Timestamp create_time = 6;

    // User-provided name for the object. This is usually the filename.
    string filename = 7;
}

Model

A Model represents a collection of ModelVariants, all with the same input and output types and semantics. It is the top-level grouping abstraction provided by the Octomizer, with ModelVariants and Workflows nested under a given Model.

// Protobuf definition for Model and ModelVariant.

syntax = "proto3";

package octoml.octomizer.v1;

option go_package = "octoml.ai/api_rest_proxy/gen/octoml/octomizer/v1";

import "google/protobuf/timestamp.proto";
import "octoml/octomizer/v1/model_inputs.proto";

// Represents a Model, which is an abstraction that groups together related ModelVariants
// and Workflows which operate on ModelVariants.
message Model {
    // UUID for this Model.
    // Not specified by clients.
    string uuid = 1;
    // UUID of the user who created this model.
    string created_by = 2;
    // User-provided inputs for this Model. User-provided inputs for this Model. If not provided for ONNX models,
    // the Octomizer will attempt to parse for inputs.
    ModelInputs inputs = 3;
    // Name for the model. When this model is packaged, this name will be used as the module name if no 
    // package name is specified.
    // Must be non-empty.
    string name = 4;
    // User-provided description.
    string description = 5;
    // Optional list of user-provided labels for this Model.
    repeated string labels = 6;
    // The timestamp when this Model was created.
    // Not specified by clients.
    google.protobuf.Timestamp create_time = 7;
    // The timestamp when this Model was updated.
    // Not specified by clients.
    google.protobuf.Timestamp update_time = 8;
    // UUID of the account that owns this model.
    string owned_by = 9;
    // The UUID of the initial ModelVariant as uploaded by the user.
    string uploaded_model_variant_uuid = 10;
    // Flag indicating that the model has been archived.
    // Archiving models is a non-destructive way of removing unwanted models from view.
    bool archived = 11;
    // ModelUpload tracks the upload information for this Model.
    // ModelUpload model_upload = 12;
    reserved 12;
    // UUID of the import workflow.
    // string import_workflow_uuid = 13;
    reserved 13;
}

// Represents a model upload, including model data and upload format.
message ModelUpload {
    // The source dataref uuid for this model upload.
    string source_dataref_uuid = 1;
    // Describes the format of the model upload.
    ModelFormat source_model_format = 2;
    // Name for the model. When this model is packaged, this name will be used as the module name.
    // Must be non-empty.
    string name = 3;
    // User-provided description.
    string description = 4;
    // Optional list of user-provided labels for this Model.
    repeated string labels = 5;
}

// Accepted upload formats for models.
enum ModelFormat {
    ONNX = 0;
    TF_SAVED_MODEL = 1;
    TF_GRAPH_DEF = 2;
    TFLITE = 3;
}

// Possible model runtimes.
enum ModelRuntime {
    ONNXRUNTIME = 0;
    TVM = 1;
}

// Represents a ModelVariant, which is a concrete instance of a given Model in a given
// format.
message ModelVariant {
    // UUID for this ModelVariant.
    // Not specified by clients.
    string uuid = 1;
    // UUID of the Model to which this ModelVariant belongs.
    string model_uuid = 2;
    // User-provided description.
    // Note that this will be populated by the Octomizer for ModelVariants generated
    // by the service, but may be edited by the user via the UpdateModelVariant RPC call.
    string description = 3;
    // Optional list of user-provided labels for this ModelVariant.
    repeated string labels = 4;
    // The timestamp when this ModelVariant was created.
    // Not specified by clients.
    google.protobuf.Timestamp create_time = 5;
    // The timestamp when this ModelVariant was updated.
    // Not specified by clients.
    google.protobuf.Timestamp update_time = 6;
    // Unused.
    reserved 7 to 10;
    // The UUID of the Workflow used to create this ModelVariant.
    // If not set, this ModelVariant was uploaded by the user.
    string source_workflow_uuid = 11;
    // Model-format-specific configuration metadata.
    ModelFormatConfig model_format_config = 12;
    // Inputs for this ModelVariant.
    ModelInputs inputs = 13;
    // The model runtimes this model variant is capable of running on.
    repeated ModelRuntime model_runtimes = 14;
}

// Format-specific configuration for models.
message ModelFormatConfig {
    oneof model_config {
        RelayModelConfig relay_model_config = 1;
        ONNXModelConfig onnx_model_config = 2;
        TFLiteModelConfig tflite_model_config = 3;
        // The reserved keywork is not supported in oneof
        // so instead all we can do is remove it.
        // KerasModelConfig keras_model_config = 4;
        TensorFlowModelConfig tensor_flow_model_config = 5;

        // next_tag = 6
    }
}

// Relay model configuration data.
message RelayModelConfig {
    // UUID of the DataRef containing the Relay model data.
    string model_dataref_uuid = 1;

    // Optional UUID of the DataRef containing this model's best autotuning logs.
    // Only provided if this ModelVariant was created as the result of an autotuning Workflow.
    // Not specified by clients.
    string best_log_dataref_uuid = 2;

    // Optional UUID of the DataRef containing this model's full autotuning logs.
    // Only provided if this ModelVariant was created as the result of an autotuning Workflow.
    // Not specified by clients.
    string full_log_dataref_uuid = 3;
}

// ONNX model configuration data.
message ONNXModelConfig {
    // UUID of the DataRef containing the ONNX model data.
    string model_dataref_uuid = 1;
}

// TFLite model configuration data.
message TFLiteModelConfig {
    // UUID of the DataRef containing the TFLite model data.
    string model_dataref_uuid = 1;
}

// TensorFlow model configuration data.
message TensorFlowModelConfig {
    // UUID of the DataRef containing the Keras model data.
    string model_dataref_uuid = 1;

    // TensorFlow protobuf format.
    TensorFlowFormat tensor_flow_format = 2;

    // Accepted TensorFlow protobuf formats accepted by the Octomizer.
    enum TensorFlowFormat {
      // Keras
      SavedModel = 0;

      // TensorFlow v1
      GraphDef = 1;
    }
}

ModelInputs

The ModelInputs message represents a given model’s input shape and data type.

// Protobuf definitions for model inputs.

syntax = "proto3";

package octoml.octomizer.v1;

option go_package = "octoml.ai/api_rest_proxy/gen/octoml/octomizer/v1";

// The input specifications for a model.
message ModelInputs {
    // Input fields.
    repeated InputField input_fields = 1;
}

message InputField {
    // Name of this input.
    string input_name = 1;
    // String indicating numpy dtype of this input, for example, "float32".
    string input_dtype = 2;
    // The shape of the input. Shapes are expected to be positive but -1
    // can be used as a sentinel when the dim is unknown and the user is
    // expected to clarify.
    repeated int32 input_shape = 3;
}

Workflow

A Workflow is a sequence of actions performed by Octomizer on a ModelVariant. Workflows can include up to three stages: Benchmark, Autotune, and Package.

// Protobuf definitions for Workflows.

syntax = "proto3";

package octoml.octomizer.v1;

option go_package = "octoml.ai/api_rest_proxy/gen/octoml/octomizer/v1";

import "octoml/octomizer/v1/hardware.proto";
import "octoml/octomizer/v1/autotune.proto";
import "octoml/octomizer/v1/benchmark.proto";
import "octoml/octomizer/v1/package.proto";
import "octoml/octomizer/v1/error.proto";

import "google/protobuf/timestamp.proto";

// A Workflow represents a set of operations to be performed by the Octomizer.
//
// A Workflow is a pipeline with up to three stages:
//   - Autotuning
//   - Benchmarking
//   - Packaging
// Note that any of these stages can be omitted, but at least one must be specified.
// The Benchmarking stage allows multiple Benchmark jobs to be executed, using different
// parameters, for the sake of performance comparisons.
message Workflow {
    // The UUID for this workflow.
    // Not specified by clients.
    string uuid = 1;

    // The UUID of the Model to which this Workflow belongs.
    string model_uuid = 2;

    // The UUID for the ModelVariant which this Workflow will operate on.
    // At present we support a single ModelVariant input to a given Workflow, however,
    // Workflows operating on multiple ModelVariants may be supported in the future.
    // This Model Variant must be associated with the model_uuid specified above.
    string model_variant_uuid = 3;

    // The current status of the Workflow.
    // Not specified by clients.
    WorkflowStatus status = 4;

    // The hardware target for the Workflow. All stages share the same hardware target.
    HardwareSpec hardware = 5;

    // If set, defines the parameters for the Autotune stage of the Workflow.
    AutotuneStageSpec autotune_stage_spec = 6;

    // If set, defines the parameters for the Benchmark stages of the Workflow.
    BenchmarkStageSpec benchmark_stage_spec = 7;

    // If set, defines the parameters for the Package stage of the Workflow.
    PackageStageSpec package_stage_spec = 8;

    // ImportModelStageSpec import_model_stage_spec = 12;
    reserved 12;

    // The timestamp when this Model was created.
    // Not specified by clients.
    google.protobuf.Timestamp create_time = 9;

    // The timestamp when this Model was updated.
    // Not specified by clients.
    google.protobuf.Timestamp update_time = 10;

    // UUID of the User that created this Workflow.
    string created_by = 11;

    // next = 13
}

// Represents the status of a Workflow.
message WorkflowStatus {
    // The timestamp of this status update.
    google.protobuf.Timestamp timestamp = 1;
    // The state of the Workflow.
    enum WorkflowState {
        // The Workflow is in an unknown state.
        UNKNOWN = 0;
        // The Workflow is pending execution by the Octomizer.
        PENDING = 1;
        // The Workflow is currently running.
        RUNNING = 2;
        // The Workflow completed successfully. The `result` field will
        // contain the Workflow result.
        COMPLETED = 3;
        // The Workflow failed during execution. The `status_message` field
        // will have additional information on the failure.
        FAILED = 4;
        // The Workflow was canceled by the user.
        CANCELED = 5;
    }
    WorkflowState state = 2;

    // Stores a human-readable status message representing the state.
    // This field should not be used to store encoded data!
    string status_message = 3;

    // Progress of the Workflow. Valid when the state is RUNNING.
    WorkflowProgress progress = 4;

    // The result of the Workflow. This is only valid if state is COMPLETED.
    WorkflowResult result = 5;

    // User-facing error details for the workflow. This is only valid if the state is FAILED.
    ErrorDetails error_details = 6;

    // Additional failure information about a Workflow. This is only valid if state is FAILED.
    // WorkflowFailureDetails failure_details = 7;
    reserved 7;
}

// Represents the progress of a Workflow's execution.
message WorkflowProgress {
    // The number of Workflow steps that have completed execution.
    uint32 completed_steps = 1;
    // The total number of steps in the Workflow.
    uint32 total_steps = 2;
}

// Represents the result of a completed Workflow.
message WorkflowResult {
    // If the Workflow included an Autotuning stage, this will store the result.
    AutotuneStageResult autotune_result = 1;

    // If the Workflow a Benchmarking stage, this will store the results.
    BenchmarkStageResult benchmark_result = 2;

    // If the Workflow included a Package stage, this will store the result.
    PackageStageResult package_result = 3;

    // ImportModelStageResult import_model_result = 4;
    reserved 4;
}

// DEPRECATED
// Allows additional info to be propagated in case of a failure.
message WorkflowFailureDetails {
    // ImportModelStageFailure import_model_failure = 1;
    reserved 1;
}

EngineSpec

EngineSpec is used to specify parameters for a particular runtime engine.

// Proto definitions for Engine.
syntax = "proto3";

package octoml.octomizer.v1;

option go_package = "octoml.ai/api_rest_proxy/gen/octoml/octomizer/v1";

// Runtime engine specification.
message EngineSpec {
    oneof engine_spec {
        TVMEngineSpec tvm_engine_spec = 1;
        ONNXRuntimeEngineSpec onnxruntime_engine_spec = 2;
        TFLiteEngineSpec tflite_engine_spec = 3;
        // The reserved keywork is not supported in oneof
        // so instead all we can do is remove it.
        // KerasEngineSpec keras_engine_spec = 4;
        TensorFlowEngineSpec tensor_flow_engine_spec = 5;

        // next_tag = 6
    }
}

// Client-provided TVM engine spec.
message TVMEngineSpec {
    // Relay optimization level. Defaults to 3.
    uint32 relay_opt_lvl = 1;
    // Enable the debug runtime for per-layer metrics. Defaults to true.
    bool enable_profiler = 2;
    // Client-provided number of threads TVM is to use for inference.
    // Passing 0 will let TVM decide. Defaults to 0.
    uint32 tvm_num_threads = 3;
    // Semver (pip-type) version specifier for TVM.
    string tvm_version = 4;
}

// ONNXRuntime engine spec.
message ONNXRuntimeEngineSpec {}

// TFLite engine spec.
message TFLiteEngineSpec {}

// TensorFlow engine spec.
message TensorFlowEngineSpec {}

HardwareSpec

HardwareSpec is used to specify the hardware that be used for a given Workflow.

// Protobuf definitions for Hardware.

syntax = "proto3";

package octoml.octomizer.v1;

option go_package = "octoml.ai/api_rest_proxy/gen/octoml/octomizer/v1";

// The client-provided hardware specification for a job request.
message HardwareSpec {
    // The hardware platform. The list of supported values for this field is
    // provided in the Octomizer documentation.
    string platform = 1;
}

BenchmarkStageSpec

BenchmarkStageSpec provides parameters for benchmarking stages.

// Protobuf definitions for benchmark stages in a workflow.

syntax = "proto3";

package octoml.octomizer.v1;

option go_package = "octoml.ai/api_rest_proxy/gen/octoml/octomizer/v1";

import "octoml/octomizer/v1/engine.proto";
import "octoml/octomizer/v1/model_inputs.proto";

// Parameters for a Benchmarking stage in a Workflow.
message BenchmarkStageSpec {
    // Engine spec to be used by this stage.
    EngineSpec engine = 1;
    // Number of benchmarking trials to execute.
    uint32 num_trials = 2;
    // The number of benchmarks to run per trial.
    uint32 runs_per_trial = 3;
    // Model inputs for this stage
    ModelInputs model_inputs = 4;
}

// Result for a Benchmarking stage.
message BenchmarkStageResult {
    // Metrics returned by the Benchmark stage.
    BenchmarkMetrics metrics = 1;
}

// Metrics from a benchmark job.
message BenchmarkMetrics {
    // Mean of runtimes in milliseconds.
    float runtime_ms_mean = 1;
    // Standard deviation of runtimes in milliseconds.
    float runtime_ms_std = 2;
    // Optional UUID for dataref of the full metrics for this benchmark.
    string full_metrics_dataref_uuid = 4;
    // Optional compilation time of the model in milliseconds.
    // This will be set to 0.0 if not measured.
    float compile_ms = 3;
}

AutotuneStageSpec

AutotuneStageSpec provides parameters for autotuning stages.

// Protobuf definitions for autotune stages in a workflow.

syntax = "proto3";

package octoml.octomizer.v1;

option go_package = "octoml.ai/api_rest_proxy/gen/octoml/octomizer/v1";

import "octoml/octomizer/v1/engine.proto";
import "octoml/octomizer/v1/model_inputs.proto";

message AutoTVMSpec {
  // Number of configurations to try for each kernel. If this kernel has
  // kernel_trials number of cached configurations, they will be retrived and
  // used instead of tuning from scratch. If this kernel has less than
  // kernel_trials number of cached configurations, the n retrieved kernels will
  // used in seeding the autotuner cost model for at least (kernel_trials - n)
  // further tuning trials.
  uint32 kernel_trials = 1;
  // Threshold to stop early if this many trials have elapsed without finding a
  // better configuration.
  uint32 early_stopping_threshold = 2;
  // Additional trials to perform beyond those which are cached from previous
  // autotunes. These trials will always be tuned from scratch. The overall
  // number of trials that are tuned is: max(exploration_trials, kernel_trials -
  // cached_trials) where cached trials is spec'd in the `kernel_trials` doc.
  uint32 exploration_trials = 3;
  // When tuning needs to be performed, this specifies how many random cached
  // trials to additionally use for seeding the autotuner cost model, on top of
  // the cached trials spec'd in the `kernel_trials` doc.
  uint32 random_trials = 4;
}

message AutoSchedulerSpec {
  enum AdaptiveTrainingBehavior {
      // Use the default behavior.
      DEFAULT = 0;
      // Disable adaptive training.
      DISABLED = 1;
      // Enable adaptive training.
      ENABLED = 2;
    }

  // The number of measurement trials. The search policy measures
  // trials_per_kernel schedules in total and returns the best one among them.
  // With trials_per_kernel == 0, the policy will do the schedule search but
  // won’t involve measurement. This can be used to get a runnable schedule
  // quickly without auto-tuning.
  uint32 trials_per_kernel = 1;

  // Stop the tuning early if getting no improvement after n measurements.
  uint32 early_stopping_threshold = 2;

  // Whether to reduce amount of model retraining during tuning.
  AdaptiveTrainingBehavior adaptive_training = 3;

  // Stop tuning a kernel if getting no improvement after n measurements.
  uint32 early_stopping_threshold_per_kernel = 4;
}

// Parameters for an Autotuning stage in a Workflow.
message AutotuneStageSpec {
  // Engine spec to be used by this stage.
  EngineSpec engine = 1;

  // Deprecated - Set tuner.autotvm.kernel_trials
  uint32 kernel_trials = 2 [ deprecated = true ];

  // Deprecated - Set tuner.autotvm.early_stopping_threshold
  uint32 early_stopping_threshold = 3 [ deprecated = true ];

  // Deprecated - Set tuner.autotvm.exploration_trials
  uint32 exploration_trials = 4 [ deprecated = true ];

  // Deprecated - Set tuner.autotvm.kernel_trials
  uint32 random_trials = 5 [ deprecated = true ];

  // Model inputs to use for this stage.
  ModelInputs model_inputs = 6;

  oneof tuner {
    AutoTVMSpec autotvm = 7;
    AutoSchedulerSpec autoscheduler = 8;
  }
}

// Result for an Autotuning stage.
message AutotuneStageResult {
  // UUID of the model variant produced from the Autotune stage.
  string model_variant_uuid = 1;
}

PackageStageSpec

PackageStageSpec provides parameters for packaging stages.

// Protobuf definitions for package stages in a workflow.

syntax = "proto3";

package octoml.octomizer.v1;

option go_package = "octoml.ai/api_rest_proxy/gen/octoml/octomizer/v1";

import "octoml/octomizer/v1/engine.proto";
import "octoml/octomizer/v1/model_inputs.proto";

// Parameters for a Packaging stage in a Workflow.
message PackageStageSpec {
    // The type of package to produce.
    PackageType package_type = 1;
    // Engine spec on which to package.
    EngineSpec engine = 2;
    // Model inputs for this stage.
    ModelInputs model_inputs = 3;
    // The name of the package.
    string package_name = 4;
}

// Results for a Package stage.
message PackageStageResult {
    // The type of result depends on the package_type field of the PackageStageSpec.
    oneof package_output {
        PythonPackageResult python_package_result = 1;
        LinuxSharedObjectResult linux_shared_object_result = 2;
    }
}

// Package type enum.
enum PackageType {
    // A Python package. Only linux distributions are currently available.
    PYTHON_PACKAGE = 0;
    // A .so package for linux.
    LINUX_SHARED_OBJECT = 1;
}

// Results for a PackageStage with package_type == PYTHON_PACKAGE.
message PythonPackageResult {
    // The DataRef UUID for the Python package produced by the Package stage.
    string package_dataref_uuid = 1;
}

// Results for a PackageStage with package_type == LINUX_SHARED_OBJECT.
message LinuxSharedObjectResult {
    // The DataRef UUID for the .so package produced by the Package stage.
    string package_dataref_uuid = 1;
}