commit b9771957a18aa09dfe287d017d9accda4b21f0aa Author: Robert M Ochshorn Date: Sat Feb 1 17:35:02 2014 +0100 store media ids in redis diff --git a/mwoffliner/mwoffliner.js b/mwoffliner/mwoffliner.js index aff8735..f63d84c 100755 --- a/mwoffliner/mwoffliner.js +++ b/mwoffliner/mwoffliner.js @@ -89,7 +89,6 @@ var name = ''; var lang = 'en'; var articleIds = {}; var namespaces = {}; -var mediaIds = {}; var webUrl = hostUrl + 'wiki/'; var apiUrl = hostUrl + 'w/api.php?'; @@ -123,7 +122,9 @@ var redis = require("redis"); /* Setup redis client */ var redisClient = redis.createClient("/tmp/redis.sock"); var redisRedirectsDatabase = Math.floor( ( Math.random() * 10000000 ) + 1 ) + "redirects"; +var redisMediaIdsDatabase = Math.floor( ( Math.random() * 10000000 ) + 1 ) + "mediaIds"; redisClient.expire( redisRedirectsDatabase, 60 * 60 *24 * 30, function( error, result) {}); +redisClient.expire( redisMediaIdsDatabase, 60 * 60 *24 * 30, function( error, result) {}); /* Compile templates */ var redirectTemplate = swig.compile( redirectTemplateCode ); @@ -911,13 +912,17 @@ function downloadMedia( url ) { var filenameBase = (parts[2].length > parts[5].length ? parts[2] : parts[5] + parts[6] + ( parts[7] || '' )); var width = parseInt( parts[4].replace( /px\-/g, '' ) ) || 9999999; - if ( mediaIds[ filenameBase ] && mediaIds[ filenameBase ] >= width ) { - return; - } else { - mediaIds[ filenameBase ] = width; - } - - downloadFile( url, getMediaPath( url ), true ); + redisClient.hget( redisMediaIdsDatabase, filenameBase, function(error, r_width) { + if( error || r_width < width) { + // Download this image & update DB + console.info("mediaId cache miss: " + filenameBase); + downloadFile( url, getMediaPath( url ), true ); + redisClient.hset( redisMediaIdsDatabase, filenameBase, width ); + } + else { + console.info("mediaId cache hit: " + filenameBase); + } + }); } process.on( 'uncaughtException', function( error ) {