2023-05-29 11:46:41 +00:00
|
|
|
use crate::sql::common::{closeparentheses, commas, openparentheses};
|
|
|
|
use crate::sql::error::IResult;
|
2023-06-19 18:41:13 +00:00
|
|
|
use crate::sql::Error::Parser;
|
2023-05-29 11:46:41 +00:00
|
|
|
use nom::branch::alt;
|
|
|
|
use nom::bytes::complete::tag_no_case;
|
|
|
|
use nom::combinator::map;
|
2023-06-19 18:41:13 +00:00
|
|
|
use nom::number::complete::recognize_float;
|
|
|
|
use nom::Err::Failure;
|
2023-05-29 11:46:41 +00:00
|
|
|
use serde::{Deserialize, Serialize};
|
|
|
|
use std::fmt;
|
2023-06-19 18:41:13 +00:00
|
|
|
use std::hash::{Hash, Hasher};
|
2023-05-29 11:46:41 +00:00
|
|
|
|
2023-06-19 18:41:13 +00:00
|
|
|
#[derive(Clone, Debug, Serialize, Deserialize)]
|
2023-05-29 11:46:41 +00:00
|
|
|
pub enum Scoring {
|
|
|
|
Bm {
|
2023-06-19 18:41:13 +00:00
|
|
|
k1: f32,
|
|
|
|
b: f32,
|
2023-05-29 11:46:41 +00:00
|
|
|
}, // BestMatching25
|
|
|
|
Vs, // VectorSearch
|
|
|
|
}
|
|
|
|
|
2023-06-19 18:41:13 +00:00
|
|
|
impl Eq for Scoring {}
|
|
|
|
|
|
|
|
impl PartialEq for Scoring {
|
|
|
|
fn eq(&self, other: &Self) -> bool {
|
|
|
|
match (self, other) {
|
|
|
|
(
|
|
|
|
Scoring::Bm {
|
|
|
|
k1,
|
|
|
|
b,
|
|
|
|
},
|
|
|
|
Scoring::Bm {
|
|
|
|
k1: other_k1,
|
|
|
|
b: other_b,
|
|
|
|
},
|
|
|
|
) => k1.to_bits() == other_k1.to_bits() && b.to_bits() == other_b.to_bits(),
|
|
|
|
(Scoring::Vs, Scoring::Vs) => true,
|
|
|
|
_ => false,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl Hash for Scoring {
|
|
|
|
fn hash<H: Hasher>(&self, state: &mut H) {
|
|
|
|
match self {
|
|
|
|
Scoring::Bm {
|
|
|
|
k1,
|
|
|
|
b,
|
|
|
|
} => {
|
|
|
|
k1.to_bits().hash(state);
|
|
|
|
b.to_bits().hash(state);
|
|
|
|
}
|
|
|
|
Scoring::Vs => 0.hash(state),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-05-29 11:46:41 +00:00
|
|
|
impl Default for Scoring {
|
|
|
|
fn default() -> Self {
|
|
|
|
Self::Bm {
|
2023-06-19 18:41:13 +00:00
|
|
|
k1: 1.2,
|
|
|
|
b: 0.75,
|
2023-05-29 11:46:41 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl fmt::Display for Scoring {
|
|
|
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
|
|
|
match self {
|
|
|
|
Self::Bm {
|
|
|
|
k1,
|
|
|
|
b,
|
2023-06-19 18:41:13 +00:00
|
|
|
} => write!(f, "BM25({},{})", k1, b),
|
2023-05-29 11:46:41 +00:00
|
|
|
Self::Vs => f.write_str("VS"),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn scoring(i: &str) -> IResult<&str, Scoring> {
|
|
|
|
alt((map(tag_no_case("VS"), |_| Scoring::Vs), |i| {
|
|
|
|
let (i, _) = tag_no_case("BM25")(i)?;
|
|
|
|
let (i, _) = openparentheses(i)?;
|
2023-06-19 18:41:13 +00:00
|
|
|
let (i, k1) = recognize_float(i)?;
|
|
|
|
let k1 = k1.parse::<f32>().map_err(|_| Failure(Parser(i)))?;
|
2023-05-29 11:46:41 +00:00
|
|
|
let (i, _) = commas(i)?;
|
2023-06-19 18:41:13 +00:00
|
|
|
let (i, b) = recognize_float(i)?;
|
|
|
|
let b = b.parse::<f32>().map_err(|_| Failure(Parser(i)))?;
|
2023-05-29 11:46:41 +00:00
|
|
|
let (i, _) = closeparentheses(i)?;
|
|
|
|
Ok((
|
|
|
|
i,
|
|
|
|
Scoring::Bm {
|
|
|
|
k1,
|
|
|
|
b,
|
|
|
|
},
|
|
|
|
))
|
|
|
|
}))(i)
|
|
|
|
}
|
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
mod tests {
|
|
|
|
use super::*;
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn scoring_bm_25() {
|
2023-06-19 18:41:13 +00:00
|
|
|
let sql = "BM25(1.0,0.6)";
|
2023-05-29 11:46:41 +00:00
|
|
|
let res = scoring(sql);
|
|
|
|
assert!(res.is_ok());
|
|
|
|
let out = res.unwrap().1;
|
2023-06-19 18:41:13 +00:00
|
|
|
assert_eq!("BM25(1,0.6)", format!("{}", out))
|
2023-05-29 11:46:41 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn scoring_vs() {
|
|
|
|
let sql = "VS";
|
|
|
|
let res = scoring(sql);
|
|
|
|
assert!(res.is_ok());
|
|
|
|
let out = res.unwrap().1;
|
|
|
|
assert_eq!("VS", format!("{}", out))
|
|
|
|
}
|
|
|
|
}
|