Using quantization
use orpheus::prelude::*;
fn main() {
let client = Orpheus::from_env().unwrap();
let res = client
.chat("Who is the greatest general of all time?")
.model("qwen/qwen3-32b")
.with_preferences(|pref| pref.quantizations([Quantization::Fp8])) // allows only providers that have 8 bit float quants
.send()
.unwrap();
println!("Model says: {}", res.content().unwrap());
}Last updated
Was this helpful?