Add the tokenizers crate to your Cargo.toml, import it, and call Tokenizer::from_file to load a model before encoding text.
use tokenizers::Tokenizer;
fn main() -> Result<(), Box<dyn std::error::Error>> {
let tokenizer = Tokenizer::from_file("path/to/tokenizer.json")?;
let encoding = tokenizer.encode("Hello, world!", false)?;
println!("Tokens: {:?}", encoding.get_ids());
Ok(())
}
Add this to your Cargo.toml:
[dependencies]
tokenizers = "0.19"