Skip to content
This repository has been archived by the owner on Feb 17, 2018. It is now read-only.

Commit

Permalink
messed w/ cache
Browse files Browse the repository at this point in the history
  • Loading branch information
riedel committed Dec 30, 2011
1 parent 49fe6d6 commit c455643
Show file tree
Hide file tree
Showing 7 changed files with 25 additions and 19 deletions.
2 changes: 1 addition & 1 deletion lib/cache.js
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ var log = new (require('./log.js').Logger)('cache'),
cfg = require('./cfg.js');

// only require node-compress if cache should use gzip compression
if(cfg.get('cache') && cfg.get('cache').compress) {
if(cfg.get('cache').compress) {
var compress = require('compress');
}

Expand Down
24 changes: 15 additions & 9 deletions lib/ce.js
Original file line number Diff line number Diff line change
Expand Up @@ -34,31 +34,33 @@ var log = new (require('./log.js').Logger)('ce'),
func = require('./func.js'),
cfg = require('./cfg.js'),
Filter = require('./filter.js').Filter,
Cache = require('./cache.js').Cache,
urlopen = require('./urlopen.js');

// external libraries
var readability = require('readability');






var ContentExtraction = function(settings) {
this.settings = settings || cfg.get('ce');
this.url_count = 0;
}

ContentExtraction.prototype.extract = function(url, content, callback) {
log.debug('extract('+url+')',url);
var cache = new Cache(url, 'rdby');

var cache=null;

if( this.settings.cache === true) {
var Cache = require('./cache.js').Cache,
cache = new Cache(url, 'rdby');
// if readability cache exists, just use it, no filtering and extraction
if(cache.exists() && this.settings.cache === true) {
if(cache.exists()) {
callback(null, cache.read(), url);
return;
}
else {
}

{
var filter = new Filter(url);
// TODO: perfect place for Step?
// pre-content extraction filter (before the extraction)
Expand All @@ -75,13 +77,17 @@ ContentExtraction.prototype.extract = function(url, content, callback) {

// post-content extraction filter (after the extraction)
filter.postFilter(info.content, function(error, post_filtered_content) {
log.debug('callback from postFilter('+pre_filtered_content.length+'):'+url, url);
if(error || !post_filtered_content) {
return callback(error);
}

// notice that the title (that is used only in single article
// requests) is not cached (the cache is deactivated)
cache.write(post_filtered_content);
if(cache) {
cache.write(post_filtered_content);
}

callback(null, post_filtered_content, url, info.title);

}); // end post filter
Expand Down
2 changes: 1 addition & 1 deletion lib/feed.js
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ var log = new (require('./log.js').Logger)('feed'),
func = require('./func.js'),
cfg = require('./cfg.js'),
entity = require('./entity.js'),
cache = require('./cache.js'),
//cache = require('./cache.js'),
urlopen = require('./urlopen.js');

// external libraries
Expand Down
1 change: 0 additions & 1 deletion lib/filter.js
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ var fs = require('fs'),
var log = new (require('./log.js').Logger)('filter'),
func = require('./func.js'),
cfg = require('./cfg.js'),
Cache = require('./cache.js').Cache,
urlopen = require('./urlopen.js');

// external libraries
Expand Down
4 changes: 2 additions & 2 deletions lib/log.js
Original file line number Diff line number Diff line change
Expand Up @@ -57,12 +57,12 @@ Logger.levels = {
Logger.options = {
console: true, // wherever the log messages may be logged to console
stderr: true, // log messages are logged to stderr (blocking!)
file: false, // activates file logging
file: true, // activates file logging
file_seperate: true, // seperate logfiles per loglevel
syncronized: false, // using sync methods for writing to logfiles
use_domain: true, // write seperate logfile for each domain
path: './logs', // write logfiles in this directory
console_level: 3, // 0: no logging 1: error 2: warn 3: info 4: debug
console_level: 4, // 0: no logging 1: error 2: warn 3: info 4: debug
file_level: 4,
modules: null // array of modules to log (other modules are ignored)
};
Expand Down
7 changes: 4 additions & 3 deletions lib/urlopen.js
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ var http = require('http'),
var log = new (require('./log.js').Logger)('urlopen'),
func = require('./func.js'),
cfg = require('./cfg.js'),
Cache = require('./cache.js').Cache,
Cache = null;
cookie = require('./cookie.js');

// external libraries
Expand Down Expand Up @@ -148,6 +148,7 @@ exports.open = function(url, settings, callback) {
}

if(settings.cache === true) {
if(!Cache) Cache=require('./cache.js').Cache;
var response_cache = new Cache(url, 'json');
var data_cache = new Cache(url, 'raw');

Expand Down Expand Up @@ -185,7 +186,7 @@ exports.open = function(url, settings, callback) {
exports.open(redirect_url, settings, callback);
}
else if(response.statusCode == 304) { // 304 Not Modified
if(!response_cache.exists() || !data_cache.exists()) {
if( settings.cache != true || !response_cache.exists() || !data_cache.exists()) {
var error = 'received not modified, but no cache availible';
log.error(error, url);
return callback(error);
Expand Down Expand Up @@ -247,7 +248,7 @@ exports.open = function(url, settings, callback) {
}

// caching
if(response_cache && data_cache) {
if(settings.cache === true && response_cache && data_cache) {
response_cache.write({
data_length: response.data_length,
url: response.url,
Expand Down
4 changes: 2 additions & 2 deletions settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@
}
},

"no-cache":
"cache":
{
"path": "./cache"
},
Expand All @@ -92,7 +92,7 @@
"file_seperate": false,
"syncronized": false,
"path": "./logs",
"console_level": 3,
"console_level": 1,
"file_level": 4
}

Expand Down

0 comments on commit c455643

Please sign in to comment.