|
| 1 | +// Copyright (C) 2023 Intel Corporation |
| 2 | +// |
| 3 | +// SPDX-License-Identifier: MIT |
| 4 | + |
| 5 | +use crate::{ |
| 6 | + page_mapper::{JsonPageMapper, ParsedJsonSection}, |
| 7 | + utils::read_skipping_ws, |
| 8 | +}; |
| 9 | +use pyo3::{prelude::*, types::PyDict}; |
| 10 | +use std::{ |
| 11 | + collections::HashMap, |
| 12 | + fs::File, |
| 13 | + io::{self, BufReader, Read, Seek}, |
| 14 | + path::Path, |
| 15 | +}; |
| 16 | + |
| 17 | +#[derive(Debug)] |
| 18 | +struct JsonSection { |
| 19 | + key: String, |
| 20 | + offset: usize, |
| 21 | + size: usize, |
| 22 | +} |
| 23 | + |
| 24 | +fn handle_arr_or_dict( |
| 25 | + mut stack: Vec<u8>, |
| 26 | + mut reader: impl Read + Seek, |
| 27 | + mut last_token: u8, |
| 28 | +) -> Result<(), io::Error> { |
| 29 | + while stack.len() != 0 { |
| 30 | + match read_skipping_ws(&mut reader) { |
| 31 | + Ok(c) => match c { |
| 32 | + b'{' | b'[' => { |
| 33 | + stack.push(c); |
| 34 | + last_token = c; |
| 35 | + } |
| 36 | + b'}' => { |
| 37 | + if last_token != b'{' { |
| 38 | + let cur_pos = reader.stream_position()?; |
| 39 | + let msg = format!("Last token in the stack is '{}', but the given token at offset={} is '}}'", last_token as char, cur_pos); |
| 40 | + return Err(io::Error::new(io::ErrorKind::InvalidData, msg)); |
| 41 | + } |
| 42 | + stack.pop(); |
| 43 | + if stack.len() != 0 { |
| 44 | + last_token = *stack |
| 45 | + .last() |
| 46 | + .ok_or(io::Error::new(io::ErrorKind::InvalidData, "stack is empty"))?; |
| 47 | + } |
| 48 | + } |
| 49 | + b']' => { |
| 50 | + if last_token != b'[' { |
| 51 | + let cur_pos = reader.stream_position()?; |
| 52 | + let msg = format!("Last token in the stack is '{}', but the given token at offset={} is ']'", last_token as char, cur_pos); |
| 53 | + return Err(io::Error::new(io::ErrorKind::InvalidData, msg)); |
| 54 | + } |
| 55 | + stack.pop(); |
| 56 | + if stack.len() != 0 { |
| 57 | + last_token = *stack |
| 58 | + .last() |
| 59 | + .ok_or(io::Error::new(io::ErrorKind::InvalidData, "stack is empty"))?; |
| 60 | + } |
| 61 | + } |
| 62 | + b'"' => { |
| 63 | + while let Ok(c) = read_skipping_ws(&mut reader) { |
| 64 | + if c == b'"' { |
| 65 | + break; |
| 66 | + } |
| 67 | + } |
| 68 | + } |
| 69 | + _ => {} |
| 70 | + }, |
| 71 | + Err(err) => { |
| 72 | + return Err(err); |
| 73 | + } |
| 74 | + } |
| 75 | + } |
| 76 | + Ok(()) |
| 77 | +} |
| 78 | + |
| 79 | +fn handle_string(mut reader: impl Read + Seek) -> Result<(), io::Error> { |
| 80 | + while let Ok(c) = read_skipping_ws(&mut reader) { |
| 81 | + if c == b'"' { |
| 82 | + break; |
| 83 | + } |
| 84 | + } |
| 85 | + Ok(()) |
| 86 | +} |
| 87 | + |
| 88 | +fn get_offset(mut reader: impl Read + Seek, stack: &mut Vec<u8>) -> Result<usize, io::Error> { |
| 89 | + let mut offset = usize::MAX; |
| 90 | + while let Ok(c) = read_skipping_ws(&mut reader) { |
| 91 | + stack.push(c); |
| 92 | + match c { |
| 93 | + b'{' | b'[' | b'"' => { |
| 94 | + return Ok(reader.stream_position()? as usize - 1); |
| 95 | + } |
| 96 | + b',' => { |
| 97 | + return Ok(offset - 1); |
| 98 | + } |
| 99 | + _ => { |
| 100 | + let pos = reader.stream_position()? as usize; |
| 101 | + offset = std::cmp::min(pos, offset); |
| 102 | + } |
| 103 | + } |
| 104 | + } |
| 105 | + Err(io::Error::new( |
| 106 | + io::ErrorKind::InvalidData, |
| 107 | + "Cannot get offset", |
| 108 | + )) |
| 109 | +} |
| 110 | + |
| 111 | +impl ParsedJsonSection for JsonSection { |
| 112 | + fn parse(buf_key: String, mut reader: impl Read + Seek) -> Result<Box<JsonSection>, io::Error> { |
| 113 | + // Move reader's cursor right after ':' |
| 114 | + while let Ok(c) = read_skipping_ws(&mut reader) { |
| 115 | + if c == b':' { |
| 116 | + break; |
| 117 | + } |
| 118 | + } |
| 119 | + |
| 120 | + let mut stack = vec![]; |
| 121 | + |
| 122 | + let start_offset = get_offset(&mut reader, &mut stack)?; |
| 123 | + |
| 124 | + let last_token = *stack |
| 125 | + .last() |
| 126 | + .ok_or(io::Error::new(io::ErrorKind::InvalidData, "stack is empty"))?; |
| 127 | + |
| 128 | + let end_offset = match last_token { |
| 129 | + b'[' | b'{' => { |
| 130 | + let _ = handle_arr_or_dict(stack, &mut reader, last_token)?; |
| 131 | + Ok(reader.stream_position()? as usize) |
| 132 | + } |
| 133 | + b'"' => { |
| 134 | + let _ = handle_string(&mut reader)?; |
| 135 | + Ok(reader.stream_position()? as usize) |
| 136 | + } |
| 137 | + b',' => Ok(reader.stream_position()? as usize - 1), |
| 138 | + _ => Err(io::Error::new(io::ErrorKind::InvalidData, "s")), |
| 139 | + }?; |
| 140 | + |
| 141 | + let size = end_offset - start_offset; |
| 142 | + |
| 143 | + Ok(Box::new(JsonSection { |
| 144 | + key: buf_key, |
| 145 | + offset: start_offset, |
| 146 | + size: size, |
| 147 | + })) |
| 148 | + } |
| 149 | +} |
| 150 | + |
| 151 | +#[derive(Debug)] |
| 152 | +pub struct JsonSectionPageMapperImpl { |
| 153 | + sections: Vec<Box<JsonSection>>, |
| 154 | +} |
| 155 | + |
| 156 | +impl JsonPageMapper<JsonSection> for JsonSectionPageMapperImpl {} |
| 157 | + |
| 158 | +impl JsonSectionPageMapperImpl { |
| 159 | + pub fn new(mut reader: impl Read + Seek) -> Result<Self, io::Error> { |
| 160 | + let sections = Self::parse_json(&mut reader)?; |
| 161 | + |
| 162 | + Ok(JsonSectionPageMapperImpl { sections: sections }) |
| 163 | + } |
| 164 | +} |
| 165 | + |
| 166 | +#[pyclass] |
| 167 | +pub struct JsonSectionPageMapper { |
| 168 | + reader: BufReader<File>, |
| 169 | + mapper: JsonSectionPageMapperImpl, |
| 170 | +} |
| 171 | + |
| 172 | +#[pymethods] |
| 173 | +impl JsonSectionPageMapper { |
| 174 | + #[new] |
| 175 | + fn py_new(path: String) -> PyResult<Self> { |
| 176 | + let file = File::open(Path::new(&path))?; |
| 177 | + let mut reader = BufReader::new(file); |
| 178 | + let mapper = JsonSectionPageMapperImpl::new(&mut reader)?; |
| 179 | + |
| 180 | + Ok(JsonSectionPageMapper { reader, mapper }) |
| 181 | + } |
| 182 | + |
| 183 | + fn sections(self_: PyRef<Self>) -> PyResult<PyObject> { |
| 184 | + let dict: HashMap<&str, HashMap<&str, usize>> = self_ |
| 185 | + .mapper |
| 186 | + .sections |
| 187 | + .iter() |
| 188 | + .map(|section| { |
| 189 | + let nested_dict: HashMap<&str, usize> = |
| 190 | + HashMap::from_iter([("offset", section.offset), ("size", section.size)]); |
| 191 | + (section.key.as_str(), nested_dict) |
| 192 | + }) |
| 193 | + .collect(); |
| 194 | + |
| 195 | + Ok(dict.into_py(self_.py())) |
| 196 | + } |
| 197 | + |
| 198 | + fn __len__(&self) -> PyResult<usize> { |
| 199 | + Ok(self.mapper.sections.len()) |
| 200 | + } |
| 201 | +} |
| 202 | + |
| 203 | +#[cfg(test)] |
| 204 | +mod tests { |
| 205 | + use super::*; |
| 206 | + use crate::test_helpers::prepare_reader; |
| 207 | + |
| 208 | + #[test] |
| 209 | + fn test_instance() { |
| 210 | + const EXAMPLE: &str = r#"{"dm_format_version": "1.0", "media_type": 2, "infos": {"string": "test", "int": 0, "float": 0.0, "string_list": ["test0", "test1", "test2"], "int_list": [0, 1, 2], "float_list": [0.0, 0.1, 0.2]}, "categories": {"label": {"labels": [{"name": "cat0", "parent": "", "attributes": ["x", "y"]}, {"name": "cat1", "parent": "", "attributes": ["x", "y"]}, {"name": "cat2", "parent": "", "attributes": ["x", "y"]}, {"name": "cat3", "parent": "", "attributes": ["x", "y"]}, {"name": "cat4", "parent": "", "attributes": ["x", "y"]}], "label_groups": [], "attributes": ["a", "b", "score"]}, "mask": {"colormap": [{"label_id": 0, "r": 0, "g": 0, "b": 0}, {"label_id": 1, "r": 128, "g": 0, "b": 0}, {"label_id": 2, "r": 0, "g": 128, "b": 0}, {"label_id": 3, "r": 128, "g": 128, "b": 0}, {"label_id": 4, "r": 0, "g": 0, "b": 128}]}, "points": {"items": [{"label_id": 0, "labels": ["cat1", "cat2"], "joints": [[0, 1]]}, {"label_id": 1, "labels": ["cat1", "cat2"], "joints": [[0, 1]]}, {"label_id": 2, "labels": ["cat1", "cat2"], "joints": [[0, 1]]}, {"label_id": 3, "labels": ["cat1", "cat2"], "joints": [[0, 1]]}, {"label_id": 4, "labels": ["cat1", "cat2"], "joints": [[0, 1]]}]}}, "items": [{"id": "42", "annotations": [{"id": 900100087038, "type": "mask", "attributes": {}, "group": 900100087038, "label_id": null, "rle": {"counts": "06", "size": [2, 3]}, "z_order": 0}, {"id": 900100087038, "type": "mask", "attributes": {}, "group": 900100087038, "label_id": null, "rle": {"counts": "06", "size": [2, 3]}, "z_order": 0}], "image": {"path": "42.jpg", "size": [10, 6]}}, {"id": "43", "annotations": [], "image": {"path": "43.qq", "size": [2, 4]}}]} |
| 211 | + "#; |
| 212 | + |
| 213 | + let (tempfile, mut reader) = prepare_reader(EXAMPLE); |
| 214 | + let json_section_page_mapper = JsonSectionPageMapperImpl::new(&mut reader).unwrap(); |
| 215 | + |
| 216 | + println!("{:?}", json_section_page_mapper); |
| 217 | + |
| 218 | + for section in json_section_page_mapper.sections { |
| 219 | + let offset = section.offset; |
| 220 | + let size = section.size; |
| 221 | + reader.seek(io::SeekFrom::Start(offset as u64)); |
| 222 | + let mut buf = vec![0; size]; |
| 223 | + reader.read(buf.as_mut_slice()); |
| 224 | + |
| 225 | + let content: serde_json::Value = serde_json::from_str( |
| 226 | + std::str::from_utf8(buf.as_slice()).expect("Cannot change to utf8"), |
| 227 | + ) |
| 228 | + .unwrap(); |
| 229 | + println!("Section: {}, Content: {:?}", section.key, content); |
| 230 | + } |
| 231 | + } |
| 232 | +} |
0 commit comments