2011-11-28 13:47:38 +00:00
|
|
|
/*
|
|
|
|
FileRetriever can asynchronously retrieve files from HTTP URLs or the local file system. It incorporates
|
|
|
|
throttling so that we don't get error EMFILE "Too many open files".
|
|
|
|
*/
|
|
|
|
|
2011-12-09 16:34:02 +00:00
|
|
|
/*jslint node: true */
|
2011-11-30 17:27:00 +00:00
|
|
|
"use strict";
|
|
|
|
|
2011-11-28 13:47:38 +00:00
|
|
|
var fs = require("fs"),
|
2011-11-28 15:15:35 +00:00
|
|
|
path = require("path"),
|
2011-11-29 18:27:03 +00:00
|
|
|
url = require("url"),
|
|
|
|
util = require("util"),
|
|
|
|
http = require("http"),
|
2011-12-02 14:40:18 +00:00
|
|
|
https = require("https");
|
2011-11-28 13:47:38 +00:00
|
|
|
|
|
|
|
var FileRetriever = exports;
|
|
|
|
|
2011-12-02 14:40:18 +00:00
|
|
|
var fileRequest = function fileRequest(filepath,callback) {
|
|
|
|
fs.readFile(filepath,"utf8", callback);
|
|
|
|
};
|
2011-11-28 13:47:38 +00:00
|
|
|
|
2011-12-02 14:40:18 +00:00
|
|
|
var httpRequest = function(fileurl,callback) {
|
|
|
|
var opts = url.parse(fileurl);
|
2011-11-29 18:27:03 +00:00
|
|
|
var httpLib = opts.protocol === "http:" ? http : https;
|
|
|
|
var request = httpLib.get(opts,function(res) {
|
|
|
|
if(res.statusCode != 200) {
|
|
|
|
var err = new Error("HTTP error");
|
|
|
|
err.code = res.statusCode.toString();
|
|
|
|
callback(err);
|
|
|
|
} else {
|
|
|
|
var data = [];
|
|
|
|
res.on("data", function(chunk) {
|
2011-11-30 16:06:34 +00:00
|
|
|
data.push(chunk);
|
2011-11-29 18:27:03 +00:00
|
|
|
});
|
|
|
|
res.on("end", function() {
|
|
|
|
callback(null,data.join(""));
|
|
|
|
});
|
|
|
|
}
|
|
|
|
});
|
|
|
|
request.addListener("error", function(err) {
|
2011-12-01 10:19:21 +00:00
|
|
|
callback(err);
|
2011-11-29 18:27:03 +00:00
|
|
|
});
|
|
|
|
request.end();
|
2011-12-02 14:40:18 +00:00
|
|
|
};
|
2011-11-29 18:27:03 +00:00
|
|
|
|
2011-11-28 13:47:38 +00:00
|
|
|
// Retrieve a file given a filepath specifier and a context path. If the filepath isn't an absolute
|
|
|
|
// filepath or an absolute URL, then it is interpreted relative to the context path, which can also be
|
2011-12-02 14:40:18 +00:00
|
|
|
// a filepath or a URL. On completion, the callback function is called as callback(err,data). The
|
|
|
|
// data hashmap is as follows:
|
|
|
|
// text: full text of file
|
2011-11-30 11:41:26 +00:00
|
|
|
// path: full path used to reach the file
|
2011-12-02 14:40:18 +00:00
|
|
|
// basename: the basename of the file
|
2011-11-30 11:41:26 +00:00
|
|
|
// extname: the extension of the file
|
2011-11-28 13:47:38 +00:00
|
|
|
FileRetriever.retrieveFile = function(filepath,contextPath,callback) {
|
2011-11-29 18:27:03 +00:00
|
|
|
var httpRegExp = /^(https?:\/\/)/gi,
|
2011-11-30 11:41:26 +00:00
|
|
|
result = {},
|
2011-11-29 18:27:03 +00:00
|
|
|
filepathIsHttp = httpRegExp.test(filepath),
|
2011-12-02 14:40:18 +00:00
|
|
|
contextPathIsHttp = httpRegExp.test(contextPath),
|
|
|
|
requester;
|
2011-11-29 18:27:03 +00:00
|
|
|
if(contextPathIsHttp || filepathIsHttp) {
|
|
|
|
// If we've got a full HTTP URI then we're good to go
|
2011-11-30 11:41:26 +00:00
|
|
|
result.path = url.resolve(contextPath,filepath);
|
|
|
|
var parsedPath = url.parse(result.path);
|
|
|
|
result.extname = path.extname(parsedPath.pathname);
|
|
|
|
result.basename = path.basename(parsedPath.extname);
|
2011-12-02 14:40:18 +00:00
|
|
|
requester = httpRequest;
|
2011-11-29 18:27:03 +00:00
|
|
|
} else {
|
|
|
|
// It's a file requested in a file context
|
2011-11-30 11:41:26 +00:00
|
|
|
result.path = path.resolve(path.dirname(contextPath),filepath);
|
|
|
|
result.extname = path.extname(result.path);
|
|
|
|
result.basename = path.basename(result.path,result.extname);
|
2011-12-02 14:40:18 +00:00
|
|
|
requester = fileRequest;
|
2011-11-29 18:27:03 +00:00
|
|
|
}
|
2011-12-02 14:40:18 +00:00
|
|
|
requester(result.path,function(err,data) {
|
|
|
|
if(!err) {
|
|
|
|
result.text = data;
|
|
|
|
}
|
|
|
|
callback(err,result);
|
|
|
|
});
|
2011-11-30 16:06:34 +00:00
|
|
|
};
|