Skip to content
This repository has been archived by the owner on Oct 10, 2023. It is now read-only.

Commit

Permalink
完善错误处理
Browse files Browse the repository at this point in the history
  • Loading branch information
Aloxaf committed May 18, 2019
1 parent 4e574bf commit 808aa52
Show file tree
Hide file tree
Showing 5 changed files with 139 additions and 69 deletions.
46 changes: 46 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# exloli

从 E 站下载指定关键词的画(ben)廊(zi)并上传到 Telegraph 并发布到 Telegram 频道

## 用法

1. 创建 Telegram Channel, 并设为公开 (私有 Channel 需要手动获取 chat id
2. 创建 Telegram Bot, 记录 TOKEN, 并拉进 Channel
3. 创建 Telegraph 账号, 记录 TOKEN. 创建方法: 访问 `https://api.telegra.ph/createAccount?short_name={}&author_name={}&author_url={}`
4. 在当前目录下建立 config.toml, 然后运行 exloli

模板如下

```toml
# 日志等级, 可选 INFO, DEBUG, ERROR
log_level = "INFO"
# 抓取线程
threads_num = "4"

[exhentai]
# E 站用户名
username = "username"
# E 站密码
password = "password"
# 搜索关键词
keyword = "female:lolicon language:Chinese"

[telegraph]
# telegraph 账号 token
access_token = "TOKEN"
# 作者名称
author_name = "exloli"
# 作者地址(通常为频道链接)
author_url = "https://t.me/exlolicon"

[telegram]
# telegram 频道 ID, 公共频道直接 @+频道名, 私有频道需要需要获取数字格式的 id
channel_id = "@exlolicon"
# 机器人 token
token = "TOKEN"

```

第一次启动将会默认从前两天的画廊开始抓取,
抓取完一本本子后将会在当前目录下生成 LAST_TIME 文件,
下次抓取会一直抓取到这个时间
2 changes: 2 additions & 0 deletions src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ use std::{fs::File, io::Read, path::Path};

#[derive(Debug, Deserialize)]
pub struct Config {
pub log_level: Option<String>,
pub threads_num: Option<String>,
pub exhentai: ExHentai,
pub telegraph: Telegraph,
pub telegram: Telegram,
Expand Down
56 changes: 21 additions & 35 deletions src/exhentai.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,41 +7,25 @@ use reqwest::{
Client, ClientBuilder, StatusCode,
};

lazy_static! {
static ref HEADERS: HeaderMap = {
macro_rules! set_header {
($($k:ident => $v:expr), *) => {{
let mut headers = HeaderMap::new();
headers.insert(
header::ACCEPT,
HeaderValue::from_static(
"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
),
);
headers.insert(
header::ACCEPT_ENCODING,
HeaderValue::from_static("gzip, deflate, br"),
);
headers.insert(
header::ACCEPT_LANGUAGE,
HeaderValue::from_static("zh-CN,en-US;q=0.7,en;q=0.3"),
);
headers.insert(header::CACHE_CONTROL, HeaderValue::from_static("max-age=0"));
headers.insert(header::DNT, HeaderValue::from_static("1"));
headers.insert(header::HOST, HeaderValue::from_static("exhentai.org"));
headers.insert(
header::REFERER,
HeaderValue::from_static("https://exhentai.org/"),
);
headers.insert(
header::UPGRADE_INSECURE_REQUESTS,
HeaderValue::from_static("1"),
);
headers.insert(
header::USER_AGENT,
HeaderValue::from_static(
"Mozilla/5.0 (X11; Linux x86_64; rv:66.0) Gecko/20100101 Firefox/66.0",
),
);
$(headers.insert(header::$k, HeaderValue::from_static($v));)*
headers
}};
}

lazy_static! {
static ref HEADERS: HeaderMap = set_header!{
ACCEPT => "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
ACCEPT_ENCODING => "gzip, deflate, br",
ACCEPT_LANGUAGE => "zh-CN,en-US;q=0.7,en;q=0.3",
CACHE_CONTROL => "max-age=0",
DNT => "1",
HOST => "exhentai.org",
REFERER => "https://exhentai.org/",
UPGRADE_INSECURE_REQUESTS => "1",
USER_AGENT => "Mozilla/5.0 (X11; Linux x86_64; rv:66.0) Gecko/20100101 Firefox/66.0"
};
}

Expand Down Expand Up @@ -174,8 +158,7 @@ impl ExHentai {
.into_text()
.unwrap()[0]
.split(' ')
.skip(1)
.next()
.nth(1)
.unwrap()
.to_owned();
debug!("评分: {}", rating);
Expand All @@ -197,6 +180,7 @@ impl ExHentai {
.xpath(r#"//table[@class="ptt"]//td[last()]/a/@href"#)?
.into_text()
{
debug!("下一页: {:?}", next_page);
let mut response = self.client.get(&next_page.swap_remove(0)).send()?;
html = parse_html(response.text()?)?;
img_pages.extend(
Expand All @@ -210,7 +194,9 @@ impl ExHentai {

/// 根据图片页面的 URL 获取图片的真实地址
pub fn get_image_url(&self, url: &str) -> Result<String, Error> {
debug!("获取图片真实地址");
let mut response = self.client.get(url).send()?;
debug!("状态码: {}", response.status());
let html = parse_html(response.text()?)?;
Ok(html
.xpath(r#"//img[@id="img"]/@src"#)?
Expand Down
84 changes: 62 additions & 22 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,12 @@ use rayon::prelude::*;
use std::{
fs,
io::{Read, Write},
path,
sync::{
atomic::{AtomicU32, Ordering::SeqCst},
Arc,
},
thread::sleep,
time,
};

mod config;
Expand All @@ -28,8 +29,7 @@ mod telegram;
mod telegraph;
mod xpath;

fn run() -> Result<(), Error> {
let config = Config::new("config.toml")?;
fn run(config: &Config) -> Result<(), Error> {
info!("登录中...");
let bot = Bot::new(&config.telegram.token);
let exhentai = ExHentai::new(&config.exhentai.username, &config.exhentai.password)?;
Expand All @@ -40,18 +40,18 @@ fn run() -> Result<(), Error> {
exhentai.search(&config.exhentai.keyword, page).ok()
});

let last_time = if path::Path::new("./LAST_TIME").exists() {
let last_time = if std::path::Path::new("./LAST_TIME").exists() {
let mut s = String::new();
fs::File::open("./LAST_TIME")?.read_to_string(&mut s)?;
s.parse::<DateTime<Local>>()?
} else {
// 默认从一天前开始
Local::now() - Duration::days(1)
// 默认从两天前开始
Local::now() - Duration::days(2)
};
debug!("截止时间: {:?}", last_time);

let galleries = galleries
.flatten()
.into_iter()
// FIXME: 由于时间只精确到分钟, 此处存在极小的忽略掉本子的可能性
.take_while(|gallery| gallery.post_time > last_time)
.collect::<Vec<Gallery>>();
Expand All @@ -73,12 +73,21 @@ fn run() -> Result<(), Error> {
let now = i.load(SeqCst);
info!("第 {} / {} 张图片", now + 1, img_pages.len());
i.store(now + 1, SeqCst);
exhentai
.get_image_url(url)
.and_then(|img_url| upload_by_url(&img_url))
.map(|result| result[0].src.to_owned())
loop {
let img_url = exhentai
.get_image_url(url)
.and_then(|img_url| upload_by_url(&img_url))
.map(|result| result["src"].to_string());
match img_url {
Ok(v) => break Ok(v),
Err(e) => {
error!("获取图片地址失败: {}", e);
sleep(time::Duration::from_secs(10));
},
}
}
})
.collect::<Result<Vec<_>, _>>()?;
.collect::<Result<Vec<String>, Error>>()?;
gallery.img_urls.extend(img_urls);

let content = gallery
Expand All @@ -88,13 +97,23 @@ fn run() -> Result<(), Error> {
.collect::<Vec<_>>()
.join(",");
info!("发布文章");
let article_url = publish_article(
&config.telegraph.access_token,
&gallery.title,
&config.telegraph.author_name,
&config.telegraph.author_url,
&format!("[{}]", content),
)?;

let article_url = loop {
let result = publish_article(
&config.telegraph.access_token,
&gallery.title,
&config.telegraph.author_name,
&config.telegraph.author_url,
&format!("[{}]", content),
);
match result {
Ok(v) => break v,
Err(e) => {
eprintln!("发布文章失败: {}", e);
sleep(time::Duration::from_secs(10));
}
}
};
info!("文章地址: {}", article_url);
bot.send_message(
&config.telegram.channel_id,
Expand All @@ -111,10 +130,31 @@ fn run() -> Result<(), Error> {
}

fn main() {
let config = Config::new("config.toml").unwrap_or_else(|e| {
eprintln!("配置文件解析失败:\n{}", e);
std::process::exit(1);
});

// 设置相关环境变量
if let Some(log_level) = config.log_level.as_ref() {
std::env::set_var("RUST_LOG", format!("exloli={}", log_level));
}
if let Some(threads_num) = config.threads_num.as_ref() {
std::env::set_var("RAYON_NUM_THREADS", threads_num);
}

env_logger::init();

match run() {
Ok(()) => (),
Err(e) => eprintln!("{}", e),
loop {
match run(&config) {
Ok(()) => {
info!("任务完成!");
return;
}
Err(e) => {
error!("任务出错: {}", e);
sleep(time::Duration::from_secs(60));
},
}
}
}
20 changes: 8 additions & 12 deletions src/telegraph.rs
Original file line number Diff line number Diff line change
@@ -1,18 +1,11 @@
use failure::Error;
use json::JsonValue;
use reqwest::{multipart::Form, Client, StatusCode};
use serde::Deserialize;
use std::io;
use tempfile::NamedTempFile;

/// 图片上传结果
#[derive(Debug, Deserialize)]
pub struct UploadResult {
/// 图片 URL, 为相对 "telegra.ph" 的地址
pub src: String,
}

/// 通过 URL 上传图片至 telegraph
pub fn upload_by_url(url: &str) -> Result<Vec<UploadResult>, Error> {
pub fn upload_by_url(url: &str) -> Result<JsonValue, Error> {
let client = Client::new();
// 下载图片
debug!("下载图片: {}", url);
Expand All @@ -27,10 +20,10 @@ pub fn upload_by_url(url: &str) -> Result<Vec<UploadResult>, Error> {
.post("https://telegra.ph/upload")
.multipart(form)
.send()?;
let json = response.json()?;
debug!("结果: {:?}", json);
let json = json::parse(&response.text()?)?;
debug!("结果: {}", json);

Ok(json)
Ok(json[0].clone())
}

/// 发布文章, 返回文章地址
Expand Down Expand Up @@ -58,6 +51,9 @@ pub fn publish_article(
return Err(format_err!("{}", text));
}
let json = json::parse(&text)?;
if json["result"]["url"] == JsonValue::Null {
return Err(format_err!("发布文章失败: {}", json));
}
Ok(json["result"]["url"].to_string())
}

Expand Down

0 comments on commit 808aa52

Please sign in to comment.