Skip to content

Commit

Permalink
v0.0.1
Browse files Browse the repository at this point in the history
  • Loading branch information
fanpei91 committed Feb 28, 2016
1 parent 938cef1 commit e9f5990
Show file tree
Hide file tree
Showing 8 changed files with 559 additions and 33 deletions.
37 changes: 5 additions & 32 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,33 +1,6 @@
# Logs
logs
*.log
npm-debug.log*

# Runtime data
pids
*.pid
*.seed

# Directory for instrumented libs generated by jscoverage/JSCover
lib-cov

# Coverage directory used by tools like istanbul
coverage

# Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files)
.grunt

# node-waf configuration
.lock-wscript

# Compiled binary addons (http://nodejs.org/api/addons.html)
build/Release

# Dependency directory
node_modules

# Optional npm cache directory
.npm

# Optional REPL history
.node_repl_history
.sync-config.cson
demo.js
.idea
.gitignore
.DS_Store
2 changes: 1 addition & 1 deletion LICENSE
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
SOFTWARE.
40 changes: 40 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
## 介绍
p2pspider 是一个 DHT 爬虫 + BT Client 的结合体, 从全球 DHT 网络里"嗅探"人们正在下载的资源, 并把资源的`metadata`(种子的主要信息)从 远程 BT 客户端下载, 并生成资源磁力链接. 通过磁力链接, 你就可以下载到资源文件.

## 用途
你可以使用 p2pspider 打造私人种子库, 也拿它做资源数据挖掘与分析.

## 安装
```
git clone https://github.com/Fuck-You-GFW/p2pspider
```

## 使用
初次使用前, 请执行`npm install`安装依赖包. 执行`node example.js`便可运行程序. **建议放在公网 VPS 上运行, 在局域网里几乎没效果.**

## 定制
你可以修改`example.js`文件对爬取到的数据进行处理/保存. 详情请看`example.js`的例子, 很简单滴.

## 待做
>* 完全 ES6 化
>* 效率优化
>* 数据保存
>* 跨平台 GUI 化
>* 数据共享
>* 资源下载
>* 视频流媒体播放
## 目标
打造成人人都能用的神器, 可以用它搜索种子; 下载资源; 共享数据库; 如果是视频, 可边下载边播放; 打造成分布式快播是可以有滴. :)

## 感谢
在开发这个项目时, 从 [bittorrent-protocol](https://github.com/feross/bittorrent-protocol)[ut_metadata](https://github.com/feross/ut_metadata) 借鉴了一些实现代码. 非常感谢其作者 [@reross](https://github.com/feross) 指点迷津.

## 交流
如果你对此项目感兴趣, 不管你是不是开发者, 都可加 QQ 群(145234507)进行实时交流. 虽然 QQ 群看起来很 Low, 但不得不说, 特别适合快速交流.

## 提醒
不要拿这个爬虫爬取的数据分享到互联网, 因为很多敏感资源; 你懂滴资源; 侵权资源. 否则后果自负喔.

## 许可证
MIT
25 changes: 25 additions & 0 deletions example.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
"use strict"

var DHTSpider = require('./p2pspider/dhtspider');
var BTClient = require('./p2pspider/btclient');

var btclient = new BTClient({ timeout: 1000 * 10 });
btclient.on('complete', (metadata, infohash, rinfo) => {

// metadata.info 含有资源名字, 资源大小, 资源文件列表等信息.

var name = metadata.info.name || metadata.info['utf-8.name'];
if (name) {
console.log('\n');
console.log('name: %s', name.toString());
console.log('from: %s:%s', rinfo.address, rinfo.port );
console.log('link: magnet:?xt=urn:btih:%s', infohash.toString('hex'));
}
});

DHTSpider.start({
btclient: btclient,
address: '0.0.0.0',
port: 6219,
nodesMaxSize: 4000
});
261 changes: 261 additions & 0 deletions p2pspider/btclient.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,261 @@
'use strict'

const EventEmitter = require('events');
const stream = require('stream');
const crypto = require('crypto');
const util = require('util');
const net = require('net');

const BitField = require('bitfield');
const bencode = require('bencode');
const LRU = require('lru');

const utils = require('./utils');

const BT_RESERVED = new Buffer([0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, 0x01]);
const BT_PROTOCOL = new Buffer('BitTorrent protocol');
const PIECE_LENGTH = Math.pow(2, 14);
const MAX_METADATA_SIZE = 10000000;
const EXT_HANDSHAKE_ID = 0;
const BITFIELD_GROW = 1000;
const BT_MSG_ID = 20;

const lru = LRU({ max: 100000, maxAge: 1000 * 60 * 10});

const Wire = function(infohash, timeout) {
stream.Duplex.call(this);

this._bitfield = new BitField(0, { grow: BITFIELD_GROW });
this._infohash = infohash;

this._buffer = [];
this._bufferSize = 0;

this._next = null;
this._nextSize = 0;

this._metadata = null;
this._metadataSize = null;
this._numPieces = 0;
this._ut_metadata = null;

this._onHandshake();
}

util.inherits(Wire, stream.Duplex);

Wire.prototype._onMessageLength = function (buffer) {
var length = buffer.readUInt32BE(0);
if (length > 0) {
this._register(length, this._onMessage)
}
};

Wire.prototype._onMessage = function (buffer) {
this._register(4, this._onMessageLength)
if (buffer[0] == BT_MSG_ID) {
this._onExtended(buffer.readUInt8(1), buffer.slice(2));
}
};

Wire.prototype._onExtended = function(ext, buf) {
if (ext === 0) {
try {
this._onExtHandshake(bencode.decode(buf));
}
catch (err) {
}
}
else {
this._onPiece(buf);
}
};

Wire.prototype._register = function (size, next) {
this._nextSize = size;
this._next = next;
};

Wire.prototype.end = function() {
stream.Duplex.prototype.end.apply(this, arguments);
};

Wire.prototype._onHandshake = function() {
this._register(1, function(buffer) {
var pstrlen = buffer.readUInt8(0);
this._register(pstrlen + 48, function(handshake) {
var protocol = handshake.slice(0, pstrlen);
if (protocol.toString() !== BT_PROTOCOL.toString()) {
this.end();
return;
}
handshake = handshake.slice(pstrlen);
if ( !!(handshake[5] & 0x10) ) {
this._sendExtHandshake();
}
this._register(4, this._onMessageLength);
}.bind(this));
}.bind(this));
};

Wire.prototype._onExtHandshake = function(extHandshake) {
if (!extHandshake.metadata_size || !extHandshake.m.ut_metadata
|| extHandshake.metadata_size > MAX_METADATA_SIZE) {
return;
}

this._metadataSize = extHandshake.metadata_size;
this._numPieces = Math.ceil(this._metadataSize / PIECE_LENGTH);
this._ut_metadata = extHandshake.m.ut_metadata;

this._requestPieces();
}

Wire.prototype._requestPieces = function() {
this._metadata = new Buffer(this._metadataSize);
for (var piece = 0; piece < this._numPieces; piece++) {
this._requestPiece(piece);
}
};

Wire.prototype._requestPiece = function(piece) {
var msg = Buffer.concat([
new Buffer([BT_MSG_ID]),
new Buffer([this._ut_metadata]),
bencode.encode({msg_type: 0, piece: piece})
]);
this._sendMessage(msg);
};

Wire.prototype._sendPacket = function(packet) {
this.push(packet);
};

Wire.prototype._sendMessage = function(msg) {
var buf = new Buffer(4);
buf.writeUInt32BE(msg.length, 0);
this._sendPacket(Buffer.concat([buf, msg]));
};

Wire.prototype.sendHandshake = function() {
var peerID = utils.randomID();
var packet = Buffer.concat([
new Buffer([BT_PROTOCOL.length]),
BT_PROTOCOL, BT_RESERVED, this._infohash, peerID
]);
this._sendPacket(packet);
};

Wire.prototype._sendExtHandshake = function() {
var msg = Buffer.concat([
new Buffer([BT_MSG_ID]),
new Buffer([EXT_HANDSHAKE_ID]),
bencode.encode({m: {ut_metadata: 1}})
]);
this._sendMessage(msg);
};

Wire.prototype._onPiece = function(piece) {
var dict, trailer;
try {
var str = piece.toString();
var trailerIndex = str.indexOf('ee') + 2;
dict = bencode.decode(str.substring(0, trailerIndex));
trailer = piece.slice(trailerIndex);
}
catch (err) {
return;
}
if (dict.msg_type != 1) {
return;
}
if (trailer.length > PIECE_LENGTH) {
return;
}
trailer.copy(this._metadata, dict.piece * PIECE_LENGTH);
this._bitfield.set(dict.piece);
this._checkDone();
};

Wire.prototype._checkDone = function () {
var done = true;
for (var piece = 0; piece < this._numPieces; piece++) {
if ( !this._bitfield.get(piece) ) {
done = false;
break;
}
}
if (!done) { return }
this._onDone(this._metadata);
};

Wire.prototype._onDone = function(metadata) {
try {
var info = bencode.decode(metadata).info;
if (info) {
metadata = bencode.encode(info);
}
}
catch (err) {
return;
}
var infohash = crypto.createHash('sha1').update(metadata).digest('hex');
if (this._infohash.toString('hex') != infohash ) {
return false;
}
this.emit('metadata', {info: bencode.decode(metadata)}, this._infohash);
};

Wire.prototype._write = function (buf, encoding, next) {
this._bufferSize += buf.length;
this._buffer.push(buf);

while (this._bufferSize >= this._nextSize) {
var buffer = Buffer.concat(this._buffer);
this._bufferSize -= this._nextSize;
this._buffer = this._bufferSize
? [buffer.slice(this._nextSize)]
: [];
this._next(buffer.slice(0, this._nextSize));
}

next(null);
}

Wire.prototype._read = function() {};


const BTClient = function(options) {
EventEmitter.call(this);

this.timeout = options.timeout;
};

util.inherits(BTClient, EventEmitter);

BTClient.prototype.download = function(rinfo, infohash) {
if ( lru.get(infohash) ) {
return;
}
lru.set(infohash, true);

var socket = new net.Socket();
socket.setTimeout(this.timeout || 5000);
socket.connect(rinfo.port, rinfo.address, () => {
var wire = new Wire(infohash, this.timeout);
socket.pipe(wire).pipe(socket);
wire.on('metadata', (metadata, infoHash) => {
this.emit('complete', metadata, infoHash, rinfo);
});
wire.sendHandshake();
});

socket.on('error', (err) => {
socket.destroy();
});
socket.on('timeout', (err) => {
socket.destroy();
});
};

module.exports = BTClient;
Loading

0 comments on commit e9f5990

Please sign in to comment.