Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature: add regex parser #4

Merged
merged 6 commits into from
Dec 20, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "data-exporter"
version = "0.2.1"
version = "0.3.0"
edition = "2021"

[dependencies]
Expand Down
13 changes: 13 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -102,3 +102,16 @@ labels: [<string>]
# field to extract as value, it is required to set either this or `value` in `metric_config`
value: <string>
```
#### regex
```
type: regex

# regex pattern with named captures
pattern: <regex>

# named captures to extract as labels
labels: [<string>]

# named capture to extract as value, it is required to set either this or `value` in `metric_config`
value: <string>
```
13 changes: 13 additions & 0 deletions examples/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -44,3 +44,16 @@ metrics:
labels:
- repo
value: stars
- name: github_stargazers
help: Stargazers per reopository
targets:
- type: http
url: https://github.com/fredr?tab=repositories&type=source
parser:
type: regex
pattern: (?s:<a [^>]+?href="/(?P<repo>fredr/[^/]+?)/stargazers">.*?(?P<stars>[\d]+)\W+</a>)
labels:
- repo
value: stars


80 changes: 52 additions & 28 deletions src/config/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,11 @@ enum Parser {
labels: Vec<String>,
value: Option<String>,
},
Regex {
pattern: String,
labels: Vec<String>,
value: Option<String>,
},
}

#[derive(Deserialize)]
Expand All @@ -25,7 +30,7 @@ struct Metric {
help: String,
value: Option<f64>,
targets: Vec<Target>,
pipeline_stages: Vec<PipelineStage>,
pipeline_stages: Option<Vec<PipelineStage>>,
parser: Parser,
}

Expand All @@ -52,33 +57,52 @@ pub fn parse(path: String) -> serde_yaml::Result<crate::DataMetrics> {
.metrics
.iter()
.map(|m| {
let (parser, labels) = match &m.parser {
Parser::Json { labels, value } => (
crate::parsers::json::JsonParser::new(labels.to_vec(), value.to_owned()),
labels,
),
};
let pipeline_stages = m
.pipeline_stages
.iter()
.map(
|s| -> Box<dyn crate::pipeline_stages::PipelineStage + Send + Sync> {
match s {
PipelineStage::Jq { query } => {
Box::new(crate::pipeline_stages::jq::Stage {
expression: query.clone(),
})
}
PipelineStage::Regex { pattern, replace } => {
Box::new(crate::pipeline_stages::regex::Stage {
regex: regex::Regex::new(pattern).unwrap(),
replace: replace.to_string(),
})
let (parser, labels): (Box<dyn crate::parsers::Parser + Send + Sync>, _) =
match &m.parser {
Parser::Regex {
labels,
value,
pattern,
} => (
Box::new(crate::parsers::regex::RegexParser::new(
pattern,
labels.to_vec(),
value.to_owned(),
)),
labels,
),
Parser::Json { labels, value } => (
Box::new(crate::parsers::json::JsonParser::new(
labels.to_vec(),
value.to_owned(),
)),
labels,
),
};

let mut pipeline_stages = Vec::new();
if let Some(stages) = &m.pipeline_stages {
pipeline_stages = stages
.iter()
.map(
|s| -> Box<dyn crate::pipeline_stages::PipelineStage + Send + Sync> {
match s {
PipelineStage::Jq { query } => {
Box::new(crate::pipeline_stages::jq::Stage {
expression: query.clone(),
})
}
PipelineStage::Regex { pattern, replace } => {
Box::new(crate::pipeline_stages::regex::Stage {
regex: regex::Regex::new(pattern).unwrap(),
replace: replace.to_string(),
})
}
}
}
},
)
.collect();
},
)
.collect();
}

let targets = m
.targets
Expand All @@ -100,7 +124,7 @@ pub fn parse(path: String) -> serde_yaml::Result<crate::DataMetrics> {
}
builder.targets(targets);
builder.pipeline_stages(pipeline_stages);
builder.parser(Box::new(parser));
builder.parser(parser);
builder.build()
})
.collect();
Expand Down
31 changes: 13 additions & 18 deletions src/parsers/json.rs
Original file line number Diff line number Diff line change
@@ -1,28 +1,26 @@
use std::collections::HashMap;

use super::ParseError;
use super::{ParseError, Parsed};

pub struct JsonParser {
labels: Vec<String>,
value: Option<String>,
}

impl super::Parser for JsonParser {
fn parse(&self, data: &str) -> Result<Vec<super::Parsed>, ParseError> {
fn parse(&self, data: &str) -> Result<Vec<Parsed>, ParseError> {
match serde_json::from_str(data)? {
serde_json::Value::Array(arr) => arr
.iter()
.map(|v| match v {
serde_json::Value::Object(obj) => self.handle_obj(obj),
_ => Err(ParseError::IncorrectType(String::from(
"exepcted object or array of objects",
))),
_ => Err(ParseError::IncorrectType(
"exepcted object or array of objects".into(),
)),
})
.collect(),
serde_json::Value::Object(obj) => Ok(vec![self.handle_obj(&obj)?]),
_ => Err(ParseError::IncorrectType(String::from(
"exepcted object or array of objects",
))),
_ => Err(ParseError::IncorrectType(
"exepcted object or array of objects".into(),
)),
}
}
}
Expand All @@ -35,28 +33,25 @@ impl JsonParser {
fn handle_obj(
&self,
obj: &serde_json::Map<String, serde_json::Value>,
) -> Result<super::Parsed, ParseError> {
let mut parsed = super::Parsed {
labels: HashMap::new(),
value: None,
};
) -> Result<Parsed, ParseError> {
let mut parsed = Parsed::new();

for label in &self.labels {
let value = obj
.get(label)
.map(|v| v.as_str())
.flatten()
.ok_or_else(|| ParseError::MissingField(String::from("expected field missing")))?;
.ok_or_else(|| ParseError::MissingField("expected field missing".into()))?;

parsed.labels.insert(label.clone(), value.to_string());
}

if let Some(key) = &self.value {
let value = obj
.get(key)
.ok_or_else(|| ParseError::MissingField(String::from("expected field missing")))?
.ok_or_else(|| ParseError::MissingField("expected field missing".into()))?
.as_f64()
.ok_or_else(|| ParseError::IncorrectType(String::from("expected a float64")))?;
.ok_or_else(|| ParseError::IncorrectType("expected a float64".into()))?;
parsed.value = Some(value);
}

Expand Down
19 changes: 18 additions & 1 deletion src/parsers/mod.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
use std::collections::HashMap;
use std::{collections::HashMap, num::ParseFloatError};

pub mod json;
pub mod regex;

#[derive(Debug)]
pub enum ParseError {
InvalidJson(serde_json::Error),
IncorrectType(String),
MissingField(String),
ParseFloat(ParseFloatError),
}

impl From<serde_json::Error> for ParseError {
Expand All @@ -15,11 +17,26 @@ impl From<serde_json::Error> for ParseError {
}
}

impl From<ParseFloatError> for ParseError {
fn from(e: ParseFloatError) -> Self {
ParseError::ParseFloat(e)
}
}

pub struct Parsed {
pub value: Option<f64>,
pub labels: HashMap<String, String>,
}

impl Parsed {
fn new() -> Self {
Parsed {
labels: HashMap::new(),
value: None,
}
}
}

pub trait Parser {
fn parse(&self, data: &str) -> Result<Vec<Parsed>, ParseError>;
}
92 changes: 92 additions & 0 deletions src/parsers/regex.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
use super::{ParseError, Parsed};

pub struct RegexParser {
regex: regex::Regex,
labels: Vec<String>,
value: Option<String>,
}

impl RegexParser {
pub fn new(pattern: &str, labels: Vec<String>, value: Option<String>) -> Self {
RegexParser {
regex: regex::Regex::new(pattern).unwrap(),
labels,
value,
}
}
}

impl super::Parser for RegexParser {
fn parse(&self, data: &str) -> Result<Vec<Parsed>, ParseError> {
self.regex
.captures_iter(data)
.try_fold(Vec::new(), |mut acc, cap| {
let mut parsed = Parsed::new();

for label in &self.labels {
let value = cap
.name(label)
.map(|m| m.as_str())
.ok_or_else(|| ParseError::MissingField("expected field missing".into()))?;

parsed.labels.insert(label.clone(), value.to_string());
}

if let Some(key) = &self.value {
let value = cap
.name(key)
.map(|m| m.as_str())
.ok_or_else(|| ParseError::MissingField("expected field missing".into()))?
.parse::<f64>()?;

parsed.value = Some(value);
}

acc.push(parsed);
Ok(acc)
})
}
}

#[cfg(test)]
mod tests {
use crate::parsers::Parser;
use std::collections::HashMap;

use super::*;

#[test]
fn test_regex_parser() {
let text = r#"a=1,b=2,c=3,d=4"#;
let pattern = r#"(?P<key>[a-z])=(?P<val>\d)"#;

let parser = RegexParser::new(pattern, vec!["key".to_string()], Some("val".to_string()));
let parsed = parser.parse(text).unwrap();

assert_eq!(parsed.len(), 4);

assert_eq!(
parsed[0].labels,
HashMap::from([("key".to_string(), "a".to_string())])
);
assert_eq!(parsed[0].value, Some(1f64));

assert_eq!(
parsed[1].labels,
HashMap::from([("key".to_string(), "b".to_string())])
);
assert_eq!(parsed[1].value, Some(2f64));

assert_eq!(
parsed[2].labels,
HashMap::from([("key".to_string(), "c".to_string())])
);
assert_eq!(parsed[2].value, Some(3f64));

assert_eq!(
parsed[3].labels,
HashMap::from([("key".to_string(), "d".to_string())])
);
assert_eq!(parsed[3].value, Some(4f64));
}
}