forked from ZimaBlue-AI/DoH
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
11 changed files
with
1,891 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
[package] | ||
name = "doh" | ||
|
||
version = "0.1.0" | ||
edition = "2021" | ||
license = "GNU GPLv3" | ||
repository = "https://github.com/ZimaBlue-AI/DoH" | ||
description = "Distributed jObs Hypervisor" | ||
rust-version = "1.82.0" | ||
authors = ["ZimaBlueAI <zimablueai@proton.me>"] | ||
|
||
[[bin]] | ||
name = "node_monitor" | ||
path = "src/node_monitor.rs" | ||
doc = false | ||
|
||
[[bin]] | ||
name = "job_monitor" | ||
path = "src/job_monitor.rs" | ||
doc = false | ||
|
||
[dependencies] | ||
sysinfo = "0.32.1" | ||
serde = { version = "1.0", features = ["derive"] } | ||
serde_json = "1.0.133" | ||
procfs = "0.17.0" | ||
regex = "1.5.4" | ||
tokio = { version = "1", features = ["full", "rt-multi-thread"] } | ||
log = "0.4" | ||
env_logger = "0.11.5" | ||
uuid = {version = "^1.8.0", features = [ | ||
"v4", # Lets you generate random UUIDs | ||
"fast-rng", # Use a faster (but still sufficiently random) RNG | ||
"macro-diagnostics", # Enable better diagnostics for compile-time UUIDs | ||
]} | ||
uuid-macro-internal = { version = "1.0.0-alpha.1" } | ||
anyhow = "1.0" | ||
reqwest = { version="0.12.2", features = ["json", "multipart", "stream"] } |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,204 @@ | ||
# DoH | ||
Distributed jObs Hypervisor | ||
# DoH: Disributed jObs Hypervisor | ||
|
||
## 0x01 Deployment | ||
|
||
Install Rust | ||
```bash | ||
# Install rust | ||
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh | ||
# or update | ||
rustup update | ||
Install openssl and libssl-dev | ||
sudo apt-get update | ||
sudo apt install openssl | ||
sudo apt install libssl-dev | ||
sudo apt install pkg-config | ||
``` | ||
|
||
## 0x02 compile | ||
```bash | ||
git clone https://github.com/ZimaBlue-AI/DoH | ||
cd DoH | ||
cargo build --release | ||
``` | ||
|
||
## 0x03 configuration | ||
|
||
### 3.1 Resource monitoring program configuration, saved as config.json | ||
|
||
```json | ||
{ | ||
"disk_monitor": { | ||
"disk_space_threshold": 107374182400, #100GB | ||
"check_interval": 60, #1 miniute | ||
"mount_points": ["/mnt/c"], | ||
"path_space": [ | ||
{ | ||
"path":"/mnt/c/DoH/", | ||
"space_threshold":[10, 26214400] # lower than 10bytes, or larger than 25MB will give out warning | ||
} | ||
], | ||
"receiver": [{ | ||
"receive_id": "ou_***", | ||
"receive_id_type": "open_id" | ||
}] | ||
}, | ||
"resource_monitor": { | ||
"check_interval": 10, #10 seconds | ||
"iops_threshold": 26214400, # read/write bytes over 25MB | ||
"memory_threshold": 1073741824, # RAM over 1GB | ||
"virtual_memory_threshold": 10737418240, # Virtual memory over 1GB | ||
"cpu_threshold": 190.0, # CPU usage over 190% | ||
"exclude_self_process": true, # don't warrning the self monitor processes | ||
"receiver": [{ | ||
"receive_id": "186***", # phone | ||
"receive_id_type": "mobile" | ||
}] | ||
}, | ||
"notice_config": { | ||
"fs_config": { | ||
"app_id": "cli_***", # feishu(lark) app_id | ||
"app_secret": "***", # feishu(lark) app_secret | ||
"receiver": [{ | ||
"receive_id": "oc_***" | ||
"receive_id_type": "chat_id" | ||
}] | ||
} | ||
} | ||
} | ||
``` | ||
|
||
|
||
### 3.2 Node hypervisor configuration | ||
|
||
```json | ||
{ | ||
"disk_monitor": { | ||
"disk_space_threshold": 107374182400, #100GB | ||
"check_interval": 60, #1 miniute | ||
"mount_points": ["/mnt/c"], | ||
"path_space": [ | ||
{ | ||
"path":"/mnt/c/DoH/", | ||
"space_threshold":[10, 26214400] # lower than 10bytes, or larger than 25MB will give out warning | ||
} | ||
], | ||
"receiver": [{ | ||
"receive_id": "ou_***", | ||
"receive_id_type": "open_id" | ||
}] | ||
}, | ||
"resource_monitor": { | ||
"check_interval": 10, #10 seconds | ||
"iops_threshold": 26214400, # read/write bytes over 25MB | ||
"memory_threshold": 1073741824, # RAM over 1GB | ||
"virtual_memory_threshold": 10737418240, # Virtual memory over 1GB | ||
"cpu_threshold": 190.0, # CPU usage over 190% | ||
"exclude_self_process": true, # don't warrning the self monitor processes | ||
"receiver": [{ | ||
"receive_id": "186***", # phone | ||
"receive_id_type": "mobile" | ||
}] | ||
}, | ||
"node_monitor": { | ||
"run_time": 10, | ||
"check_interval": 60, | ||
"node_id": "management_node", # node name | ||
"exclude_users": ["root"], # don not daemon root user | ||
"include_users": ["ai"], # daemon user list | ||
"receiver": [{ | ||
"receive_id": "186***", | ||
"receive_id_type": "mobile" | ||
}] | ||
}, | ||
"notice_config": { | ||
"fs_config": { | ||
"app_id": "cli_***", # feishu(lark) app_id | ||
"app_secret": "***", # feishu(lark) app_secret | ||
"receiver": [] | ||
} | ||
} | ||
} | ||
``` | ||
|
||
### 3.3 Job management program configuration | ||
``` | ||
{ | ||
"disk_monitor": { | ||
"disk_space_threshold": 107374182400, #100GB | ||
"check_interval": 60, #1 miniute | ||
"mount_points": ["/mnt/c"], | ||
"path_space": [ | ||
{ | ||
"path":"/mnt/c/DoH/", | ||
"space_threshold":[10, 26214400] # lower than 10bytes, or larger than 25MB will give out warning | ||
} | ||
], | ||
"receiver": [{ | ||
"receive_id": "ou_***", | ||
"receive_id_type": "open_id" | ||
}] | ||
}, | ||
"resource_monitor": { | ||
"check_interval": 10, #10 seconds | ||
"iops_threshold": 26214400, # read/write bytes over 25MB | ||
"memory_threshold": 1073741824, # RAM over 1GB | ||
"virtual_memory_threshold": 10737418240, # Virtual memory over 1GB | ||
"cpu_threshold": 190.0, # CPU usage over 190% | ||
"exclude_self_process": true, # don't warrning the self monitor processes | ||
"receiver": [{ | ||
"receive_id": "186***", # phone | ||
"receive_id_type": "mobile" | ||
}] | ||
}, | ||
"job_monitor": { | ||
"check_interval": 10, | ||
"script_path": "/mnt/c/DoH/test/test.sh", # run job shell script | ||
"receiver": [{ | ||
"receive_id": "ou_***", | ||
"receive_id_type": "open_id" | ||
}] | ||
}, | ||
"notice_config": { | ||
"fs_config": { | ||
"app_id": "cli_***", # feishu(lark) app_id | ||
"app_secret": "***", # feishu(lark) app_secret | ||
"receiver": [{ | ||
"receive_id": "oc_***" | ||
"receive_id_type": "chat_id" | ||
}] | ||
} | ||
} | ||
} | ||
``` | ||
|
||
## 0x04 Run | ||
### 4.1 Resource monitoring | ||
```bash | ||
RUST_LOG=INFO ./target/release/doh | ||
``` | ||
|
||
### 4.2 Node monitoring | ||
```bash | ||
RUST_LOG=INFO ./target/release/node_monitor | ||
``` | ||
|
||
### 4.3 Job monitoring | ||
```bash | ||
RUST_LOG=INFO ./target/release/job_monitor | ||
``` | ||
|
||
## 0x05 TODO | ||
|
||
- [ ] Add support for other IM (WeCom, DingTalk, Slack, Discord, etc.) | ||
- [ ] Increase resource use assessment report | ||
- [ ] ReAct according to constraint policy (Response & Action) | ||
- [ ] Remote control and web interaction | ||
- [ ] Increase artificial intelligence management | ||
|
||
## 0x06 License | ||
|
||
Licensed under [GNU General Public License v3.0 (GPL-3.0)](https://www.gnu.org/licenses/gpl-3.0.html). | ||
|
||
--- | ||
Copyright (c) 2024 ZimaBlueAI Tech. Co. Ltd. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
{ | ||
"disk_monitor": { | ||
"disk_space_threshold": 107374182400, | ||
"check_interval": 60, | ||
"mount_points": ["/mnt/c"], | ||
"path_space": [ | ||
{ | ||
"path":"/mnt/c/DoH/", | ||
"space_threshold":[10, 26214400] | ||
} | ||
], | ||
"receiver": [{ | ||
"receive_id": "ou_***", | ||
"receive_id_type": "open_id" | ||
}] | ||
}, | ||
"resource_monitor": { | ||
"check_interval": 10, | ||
"iops_threshold": 26214400, | ||
"memory_threshold": 1073741824, | ||
"virtual_memory_threshold": 10737418240, | ||
"cpu_threshold": 190.0, | ||
"exclude_self_process": true, | ||
"receiver": [{ | ||
"receive_id": "186***", | ||
"receive_id_type": "mobile" | ||
}] | ||
}, | ||
"node_monitor": { | ||
"run_time": 10, | ||
"check_interval": 60, | ||
"node_id": "management_node", | ||
"exclude_users": ["root"], | ||
"include_users": ["ai"], | ||
"receiver": [{ | ||
"receive_id": "186***", | ||
"receive_id_type": "mobile" | ||
}] | ||
}, | ||
"job_monitor": { | ||
"check_interval": 10, | ||
"script_path": "/mnt/c/DoH/test/test.sh", | ||
"receiver": [{ | ||
"receive_id": "ou_***", | ||
"receive_id_type": "open_id" | ||
}] | ||
}, | ||
"notice_config": { | ||
"fs_config": { | ||
"app_id": "cli_***", | ||
"app_secret": "***", | ||
"receiver": [{ | ||
"receive_id": "oc_***", | ||
"receive_id_type": "chat_id" | ||
}] | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
// use serde::Deserialize; | ||
use std::fs; | ||
use std::thread; | ||
use std::time::Duration; | ||
use sysinfo::Disks; | ||
use log::warn; | ||
|
||
use doh::{ | ||
Config, | ||
read_config, | ||
format_bytes, | ||
get_dir_size, | ||
notify_msg | ||
}; | ||
|
||
pub async fn start_disk_monitor(config_path: &str) { | ||
let config: Config = read_config(config_path); | ||
let disk_config = config.disk_monitor; | ||
|
||
tokio::spawn(async move { | ||
// let mut sys = System::new_all(); | ||
loop { | ||
let disks = Disks::new_with_refreshed_list(); | ||
let mut msg_content = String::new(); | ||
for disk in disks.list() { | ||
let mount_point = disk.mount_point().to_str().unwrap_or("Unknown").to_string(); | ||
let available_space = disk.available_space(); | ||
if (disk_config.mount_points.is_empty() || disk_config.mount_points.contains(&mount_point)) && available_space < disk_config.disk_space_threshold { | ||
// println!("Warning: Disk space is below threshold: {} B available", available_space); | ||
warn!( | ||
"Warning: Disk space is below threshold on disk Filesystem {:?} Mounted on {:?}: {} available", | ||
disk.name().to_str().unwrap_or("Unknown"), | ||
mount_point, | ||
format_bytes(available_space) | ||
); | ||
msg_content.push_str(&format!("Warning: Disk space is below threshold on disk Filesystem {:?} Mounted on {:?}: {} available\n", | ||
disk.name().to_str().unwrap_or("Unknown"), | ||
mount_point, | ||
format_bytes(available_space) | ||
)); | ||
} | ||
// Add SMART information and disk damage alarm here | ||
} | ||
if msg_content.len() > 0 { | ||
let _ = notify_msg(&config.notice_config, &disk_config.receiver, &msg_content); | ||
msg_content.clear(); | ||
} | ||
|
||
// Check file increase/decrease data size | ||
for path_space in &disk_config.path_space { | ||
let path = &path_space.path; | ||
let space_threshold = path_space.space_threshold; | ||
if fs::metadata(path).is_ok() { | ||
let metadata = fs::metadata(path).unwrap(); | ||
if metadata.is_dir() { | ||
let total_size = get_dir_size(path); | ||
if total_size > space_threshold.1 || total_size < space_threshold.0 { | ||
warn!( | ||
"Warning: Directory size exceeds threshold in path {:?}: {}", | ||
path, | ||
format_bytes(total_size) | ||
); | ||
msg_content.push_str(&format!("Warning: Directory size exceeds threshold in path {:?}: {}\n", | ||
path, | ||
format_bytes(total_size) | ||
)); | ||
} | ||
} else { | ||
if metadata.len() > space_threshold.1 || metadata.len() < space_threshold.0 { | ||
warn!( | ||
"Warning: File size exceeds threshold in path {:?}: {}", | ||
path, | ||
format_bytes(metadata.len()) | ||
); | ||
msg_content.push_str(&format!("Warning: File size exceeds threshold in path {:?}: {}\n", | ||
path, | ||
format_bytes(metadata.len()) | ||
)); | ||
} | ||
} | ||
} | ||
} | ||
if msg_content.len() > 0 { | ||
let _ = notify_msg(&config.notice_config, &disk_config.receiver, &msg_content).await; | ||
msg_content.clear(); | ||
} | ||
thread::sleep(Duration::from_secs(disk_config.check_interval)); | ||
} | ||
}); | ||
} |
Oops, something went wrong.