Use the axum crate with tokio to create a high-performance, async REST API that loads your ML model and exposes prediction endpoints.
[dependencies]
axum = "0.7"
tokio = { version = "1", features = ["full"] }
serde = { version = "1", features = ["derive"] }
serde_json = "1"
use axum::{routing::post, Json, Router};
use serde::{Deserialize, Serialize};
use std::net::SocketAddr;
#[derive(Deserialize)]
struct InputData { features: Vec<f32> }
#[derive(Serialize)]
struct Prediction { result: f32 }
async fn predict(Json(input): Json<InputData>) -> Json<Prediction> {
// Load model and run inference here
Json(Prediction { result: 0.95 })
}
#[tokio::main]
async fn main() {
let app = Router::new().route("/predict", post(predict));
let addr = SocketAddr::from(([127, 0, 0, 1], 3000));
println!("Listening on {}", addr);
let listener = tokio::net::TcpListener::bind(addr).await.unwrap();
axum::serve(listener, app).await.unwrap();
}
- Create a new Rust project and add the dependencies listed in the first code block to your
Cargo.toml. - Replace the contents of
src/main.rswith the server code provided in the second code block. - Run the server using
cargo runand send a POST request tohttp://127.0.0.1:3000/predictwith JSON input.