Skip to content

Commit

Permalink
first upload
Browse files Browse the repository at this point in the history
  • Loading branch information
BENMFeng committed Nov 27, 2024
1 parent 67efd24 commit 4cc7b86
Show file tree
Hide file tree
Showing 11 changed files with 1,891 additions and 2 deletions.
38 changes: 38 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
[package]
name = "doh"

version = "0.1.0"
edition = "2021"
license = "GNU GPLv3"
repository = "https://github.com/ZimaBlue-AI/DoH"
description = "Distributed jObs Hypervisor"
rust-version = "1.82.0"
authors = ["ZimaBlueAI <zimablueai@proton.me>"]

[[bin]]
name = "node_monitor"
path = "src/node_monitor.rs"
doc = false

[[bin]]
name = "job_monitor"
path = "src/job_monitor.rs"
doc = false

[dependencies]
sysinfo = "0.32.1"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0.133"
procfs = "0.17.0"
regex = "1.5.4"
tokio = { version = "1", features = ["full", "rt-multi-thread"] }
log = "0.4"
env_logger = "0.11.5"
uuid = {version = "^1.8.0", features = [
"v4", # Lets you generate random UUIDs
"fast-rng", # Use a faster (but still sufficiently random) RNG
"macro-diagnostics", # Enable better diagnostics for compile-time UUIDs
]}
uuid-macro-internal = { version = "1.0.0-alpha.1" }
anyhow = "1.0"
reqwest = { version="0.12.2", features = ["json", "multipart", "stream"] }
206 changes: 204 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,204 @@
# DoH
Distributed jObs Hypervisor
# DoH: Disributed jObs Hypervisor

## 0x01 Deployment

Install Rust
```bash
# Install rust
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
# or update
rustup update
Install openssl and libssl-dev
sudo apt-get update
sudo apt install openssl
sudo apt install libssl-dev
sudo apt install pkg-config
```

## 0x02 compile
```bash
git clone https://github.com/ZimaBlue-AI/DoH
cd DoH
cargo build --release
```

## 0x03 configuration

### 3.1 Resource monitoring program configuration, saved as config.json

```json
{
"disk_monitor": {
"disk_space_threshold": 107374182400, #100GB
"check_interval": 60, #1 miniute
"mount_points": ["/mnt/c"],
"path_space": [
{
"path":"/mnt/c/DoH/",
"space_threshold":[10, 26214400] # lower than 10bytes, or larger than 25MB will give out warning
}
],
"receiver": [{
"receive_id": "ou_***",
"receive_id_type": "open_id"
}]
},
"resource_monitor": {
"check_interval": 10, #10 seconds
"iops_threshold": 26214400, # read/write bytes over 25MB
"memory_threshold": 1073741824, # RAM over 1GB
"virtual_memory_threshold": 10737418240, # Virtual memory over 1GB
"cpu_threshold": 190.0, # CPU usage over 190%
"exclude_self_process": true, # don't warrning the self monitor processes
"receiver": [{
"receive_id": "186***", # phone
"receive_id_type": "mobile"
}]
},
"notice_config": {
"fs_config": {
"app_id": "cli_***", # feishu(lark) app_id
"app_secret": "***", # feishu(lark) app_secret
"receiver": [{
"receive_id": "oc_***"
"receive_id_type": "chat_id"
}]
}
}
}
```


### 3.2 Node hypervisor configuration

```json
{
"disk_monitor": {
"disk_space_threshold": 107374182400, #100GB
"check_interval": 60, #1 miniute
"mount_points": ["/mnt/c"],
"path_space": [
{
"path":"/mnt/c/DoH/",
"space_threshold":[10, 26214400] # lower than 10bytes, or larger than 25MB will give out warning
}
],
"receiver": [{
"receive_id": "ou_***",
"receive_id_type": "open_id"
}]
},
"resource_monitor": {
"check_interval": 10, #10 seconds
"iops_threshold": 26214400, # read/write bytes over 25MB
"memory_threshold": 1073741824, # RAM over 1GB
"virtual_memory_threshold": 10737418240, # Virtual memory over 1GB
"cpu_threshold": 190.0, # CPU usage over 190%
"exclude_self_process": true, # don't warrning the self monitor processes
"receiver": [{
"receive_id": "186***", # phone
"receive_id_type": "mobile"
}]
},
"node_monitor": {
"run_time": 10,
"check_interval": 60,
"node_id": "management_node", # node name
"exclude_users": ["root"], # don not daemon root user
"include_users": ["ai"], # daemon user list
"receiver": [{
"receive_id": "186***",
"receive_id_type": "mobile"
}]
},
"notice_config": {
"fs_config": {
"app_id": "cli_***", # feishu(lark) app_id
"app_secret": "***", # feishu(lark) app_secret
"receiver": []
}
}
}
```

### 3.3 Job management program configuration
```
{
"disk_monitor": {
"disk_space_threshold": 107374182400, #100GB
"check_interval": 60, #1 miniute
"mount_points": ["/mnt/c"],
"path_space": [
{
"path":"/mnt/c/DoH/",
"space_threshold":[10, 26214400] # lower than 10bytes, or larger than 25MB will give out warning
}
],
"receiver": [{
"receive_id": "ou_***",
"receive_id_type": "open_id"
}]
},
"resource_monitor": {
"check_interval": 10, #10 seconds
"iops_threshold": 26214400, # read/write bytes over 25MB
"memory_threshold": 1073741824, # RAM over 1GB
"virtual_memory_threshold": 10737418240, # Virtual memory over 1GB
"cpu_threshold": 190.0, # CPU usage over 190%
"exclude_self_process": true, # don't warrning the self monitor processes
"receiver": [{
"receive_id": "186***", # phone
"receive_id_type": "mobile"
}]
},
"job_monitor": {
"check_interval": 10,
"script_path": "/mnt/c/DoH/test/test.sh", # run job shell script
"receiver": [{
"receive_id": "ou_***",
"receive_id_type": "open_id"
}]
},
"notice_config": {
"fs_config": {
"app_id": "cli_***", # feishu(lark) app_id
"app_secret": "***", # feishu(lark) app_secret
"receiver": [{
"receive_id": "oc_***"
"receive_id_type": "chat_id"
}]
}
}
}
```

## 0x04 Run
### 4.1 Resource monitoring
```bash
RUST_LOG=INFO ./target/release/doh
```

### 4.2 Node monitoring
```bash
RUST_LOG=INFO ./target/release/node_monitor
```

### 4.3 Job monitoring
```bash
RUST_LOG=INFO ./target/release/job_monitor
```

## 0x05 TODO

- [ ] Add support for other IM (WeCom, DingTalk, Slack, Discord, etc.)
- [ ] Increase resource use assessment report
- [ ] ReAct according to constraint policy (Response & Action)
- [ ] Remote control and web interaction
- [ ] Increase artificial intelligence management

## 0x06 License

Licensed under [GNU General Public License v3.0 (GPL-3.0)](https://www.gnu.org/licenses/gpl-3.0.html).

---
Copyright (c) 2024 ZimaBlueAI Tech. Co. Ltd.
58 changes: 58 additions & 0 deletions config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
{
"disk_monitor": {
"disk_space_threshold": 107374182400,
"check_interval": 60,
"mount_points": ["/mnt/c"],
"path_space": [
{
"path":"/mnt/c/DoH/",
"space_threshold":[10, 26214400]
}
],
"receiver": [{
"receive_id": "ou_***",
"receive_id_type": "open_id"
}]
},
"resource_monitor": {
"check_interval": 10,
"iops_threshold": 26214400,
"memory_threshold": 1073741824,
"virtual_memory_threshold": 10737418240,
"cpu_threshold": 190.0,
"exclude_self_process": true,
"receiver": [{
"receive_id": "186***",
"receive_id_type": "mobile"
}]
},
"node_monitor": {
"run_time": 10,
"check_interval": 60,
"node_id": "management_node",
"exclude_users": ["root"],
"include_users": ["ai"],
"receiver": [{
"receive_id": "186***",
"receive_id_type": "mobile"
}]
},
"job_monitor": {
"check_interval": 10,
"script_path": "/mnt/c/DoH/test/test.sh",
"receiver": [{
"receive_id": "ou_***",
"receive_id_type": "open_id"
}]
},
"notice_config": {
"fs_config": {
"app_id": "cli_***",
"app_secret": "***",
"receiver": [{
"receive_id": "oc_***",
"receive_id_type": "chat_id"
}]
}
}
}
90 changes: 90 additions & 0 deletions src/disk_monitor.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
// use serde::Deserialize;
use std::fs;
use std::thread;
use std::time::Duration;
use sysinfo::Disks;
use log::warn;

use doh::{
Config,
read_config,
format_bytes,
get_dir_size,
notify_msg
};

pub async fn start_disk_monitor(config_path: &str) {
let config: Config = read_config(config_path);
let disk_config = config.disk_monitor;

tokio::spawn(async move {
// let mut sys = System::new_all();
loop {
let disks = Disks::new_with_refreshed_list();
let mut msg_content = String::new();
for disk in disks.list() {
let mount_point = disk.mount_point().to_str().unwrap_or("Unknown").to_string();
let available_space = disk.available_space();
if (disk_config.mount_points.is_empty() || disk_config.mount_points.contains(&mount_point)) && available_space < disk_config.disk_space_threshold {
// println!("Warning: Disk space is below threshold: {} B available", available_space);
warn!(
"Warning: Disk space is below threshold on disk Filesystem {:?} Mounted on {:?}: {} available",
disk.name().to_str().unwrap_or("Unknown"),
mount_point,
format_bytes(available_space)
);
msg_content.push_str(&format!("Warning: Disk space is below threshold on disk Filesystem {:?} Mounted on {:?}: {} available\n",
disk.name().to_str().unwrap_or("Unknown"),
mount_point,
format_bytes(available_space)
));
}
// Add SMART information and disk damage alarm here
}
if msg_content.len() > 0 {
let _ = notify_msg(&config.notice_config, &disk_config.receiver, &msg_content);
msg_content.clear();
}

// Check file increase/decrease data size
for path_space in &disk_config.path_space {
let path = &path_space.path;
let space_threshold = path_space.space_threshold;
if fs::metadata(path).is_ok() {
let metadata = fs::metadata(path).unwrap();
if metadata.is_dir() {
let total_size = get_dir_size(path);
if total_size > space_threshold.1 || total_size < space_threshold.0 {
warn!(
"Warning: Directory size exceeds threshold in path {:?}: {}",
path,
format_bytes(total_size)
);
msg_content.push_str(&format!("Warning: Directory size exceeds threshold in path {:?}: {}\n",
path,
format_bytes(total_size)
));
}
} else {
if metadata.len() > space_threshold.1 || metadata.len() < space_threshold.0 {
warn!(
"Warning: File size exceeds threshold in path {:?}: {}",
path,
format_bytes(metadata.len())
);
msg_content.push_str(&format!("Warning: File size exceeds threshold in path {:?}: {}\n",
path,
format_bytes(metadata.len())
));
}
}
}
}
if msg_content.len() > 0 {
let _ = notify_msg(&config.notice_config, &disk_config.receiver, &msg_content).await;
msg_content.clear();
}
thread::sleep(Duration::from_secs(disk_config.check_interval));
}
});
}
Loading

0 comments on commit 4cc7b86

Please sign in to comment.