故障表现:服务器CPU占用100%,导致无法处理任何请求,进程假死。
错误日志文件: 大量的写日志失败和reloaded错误日志:
查看每分钟重启次数,在3万次左右(基本确定是这个原因导致CPU打满):
写一个测试js文件写入相同日志文件,写入成功,排除文件占用和权限问题:
const fs = require('fs');
const filename = '/root/logs/page-builder-client/page-builder-client-kafka.log';
const stream = fs.createWriteStream(filename, { flags: 'a' });
stream._onError = (err) => {
console.error('%s ERROR %s [egg-logger] [%s] %s',
Date(), process.pid, this.options.file, err.stack);
this.reload();
console.warn('%s WARN %s [egg-logger] [%s] reloaded', Date(), process.pid, this.options.file);
};
stream.write('this is a test input.', 'utf-8');
问题:这个是什么原因导致的写入失败,reload的重试没有次数限制吗,导致耗尽CPU资源,怎么处理这种问题。 相关issue问题:https://github.com/eggjs/egg/issues/4533
附上定位问题查到的相关源代码:
/**
* reload file stream
*/
reload() {
this._closeStream();
this._stream = this._createStream();
}
/**
* create stream
* @return {Stream} return writeStream
* @private
*/
_createStream() {
mkdirp.sync(path.dirname(this.options.file));
const stream = fs.createWriteStream(this.options.file, { flags: 'a' });
const onError = err => {
console.error('%s ERROR %s [egg-logger] [%s] %s',
utility.logDate(','), process.pid, this.options.file, err.stack);
this.reload();
console.warn('%s WARN %s [egg-logger] [%s] reloaded', utility.logDate(','), process.pid, this.options.file);
};
// only listen error once because stream will reload after error
stream.once('error', onError);
stream._onError = onError;
return stream;
}
/**
* close stream
* @private
*/
_closeStream() {
if (this._stream) {
this._stream.end();
this._stream.removeListener('error', this._stream._onError);
this._stream = null;
}
}