Reading a file in real-time using Node.js

If you want to keep the file as a persistent store of your data to prevent a loss of stream in case of a system crash or one of the members in your network of running processes dies, you can still continue on writing to a file and reading from it.

If you do not need this file as a persistent storage of produced results from your Java process, then going with a Unix socket is much better for both the ease and also the performance.

fs.watchFile() is not what you need because it works on file stats as filesystem reports it and since you want to read the file as it is already being written, this is not what you want.

SHORT UPDATE: I am very sorry to realize that although I had accused fs.watchFile() for using file stats in previous paragraph, I had done the very same thing myself in my example code below! Although I had already warned readers to “take care!” because I had written it in just a few minutes without even testing well; still, it can be done better by using fs.watch() instead of watchFile or fstatSync if underlying system supports it.

For reading/writing from a file, I have just written below for fun in my break:

test-fs-writer.js: [You will not need this since you write file in your Java process]

var fs = require('fs'),
    lineno=0;

var stream = fs.createWriteStream('test-read-write.txt', {flags:'a'});

stream.on('open', function() {
    console.log('Stream opened, will start writing in 2 secs');
    setInterval(function() { stream.write((++lineno)+' oi!\n'); }, 2000);
});

test-fs-reader.js: [Take care, this is just demonstration, check err objects!]

var fs = require('fs'),
    bite_size = 256,
    readbytes = 0,
    file;

fs.open('test-read-write.txt', 'r', function(err, fd) { file = fd; readsome(); });

function readsome() {
    var stats = fs.fstatSync(file); // yes sometimes async does not make sense!
    if(stats.size<readbytes+1) {
        console.log('Hehe I am much faster than your writer..! I will sleep for a while, I deserve it!');
        setTimeout(readsome, 3000);
    }
    else {
        fs.read(file, new Buffer(bite_size), 0, bite_size, readbytes, processsome);
    }
}

function processsome(err, bytecount, buff) {
    console.log('Read', bytecount, 'and will process it now.');

    // Here we will process our incoming data:
        // Do whatever you need. Just be careful about not using beyond the bytecount in buff.
        console.log(buff.toString('utf-8', 0, bytecount));

    // So we continue reading from where we left:
    readbytes+=bytecount;
    process.nextTick(readsome);
}

You can safely avoid using nextTick and call readsome() directly instead. Since we are still working sync here, it is not necessary in any sense. I just like it. :p

EDIT by Oliver Lloyd

Taking the example above but extending it to read CSV data gives:

var lastLineFeed,
    lineArray;
function processsome(err, bytecount, buff) {
    lastLineFeed = buff.toString('utf-8', 0, bytecount).lastIndexOf('\n');

    if(lastLineFeed > -1){

        // Split the buffer by line
        lineArray = buff.toString('utf-8', 0, bytecount).slice(0,lastLineFeed).split('\n');

        // Then split each line by comma
        for(i=0;i<lineArray.length;i++){
            // Add read rows to an array for use elsewhere
            valueArray.push(lineArray[i].split(','));
        }   

        // Set a new position to read from
        readbytes+=lastLineFeed+1;
    } else {
        // No complete lines were read
        readbytes+=bytecount;
    }
    process.nextTick(readFile);
}

Leave a Comment