Warn about resource shortages (15min load average exceeding number of cores, available disk space for database and log file, and available shared memory (RAM))
Signed-off-by: DL6ER <dl6er@dl6er.de>
This commit is contained in:
parent
0d44449e7e
commit
4c2c59b214
|
@ -27,7 +27,7 @@
|
|||
#include "../gc.h"
|
||||
|
||||
static const char *message_types[MAX_MESSAGE] =
|
||||
{ "REGEX", "SUBNET", "HOSTNAME", "DNSMASQ_CONFIG", "RATE_LIMIT" , "DNSMASQ_WARN" };
|
||||
{ "REGEX", "SUBNET", "HOSTNAME", "DNSMASQ_CONFIG", "RATE_LIMIT", "DNSMASQ_WARN", "LOAD", "SHMEM", "DISK" };
|
||||
|
||||
static unsigned char message_blob_types[MAX_MESSAGE][5] =
|
||||
{
|
||||
|
@ -59,7 +59,7 @@ static unsigned char message_blob_types[MAX_MESSAGE][5] =
|
|||
SQLITE_NULL, // Not used
|
||||
SQLITE_NULL // Not used
|
||||
},
|
||||
{ // RATE_LIMIT: The message column contains the IP address of the client in question
|
||||
{ // RATE_LIMIT_MESSAGE: The message column contains the IP address of the client in question
|
||||
SQLITE_INTEGER, // Configured maximum number of queries
|
||||
SQLITE_INTEGER, // Configured rate-limiting interval [seconds]
|
||||
SQLITE_NULL, // Not used
|
||||
|
@ -73,6 +73,27 @@ static unsigned char message_blob_types[MAX_MESSAGE][5] =
|
|||
SQLITE_NULL, // Not used
|
||||
SQLITE_NULL // Not used
|
||||
},
|
||||
{ // LOAD_MESSAGE: The message column contains a general message
|
||||
SQLITE_FLOAT, // 15min load average
|
||||
SQLITE_INTEGER, // Number of cores
|
||||
SQLITE_NULL, // Not used
|
||||
SQLITE_NULL, // Not used
|
||||
SQLITE_NULL // Not used
|
||||
},
|
||||
{ // SHMEM_MESSAGE: The message column contains the corresponding path
|
||||
SQLITE_INTEGER, // Percentage currently used
|
||||
SQLITE_TEXT, // Human-readable details about memory/disk usage
|
||||
SQLITE_NULL, // Not used
|
||||
SQLITE_NULL, // Not used
|
||||
SQLITE_NULL // Not used
|
||||
},
|
||||
{ // DISK_MESSAGE: The message column contains the corresponding path
|
||||
SQLITE_INTEGER, // Percentage currently used
|
||||
SQLITE_TEXT, // Human-readable details about memory/disk usage
|
||||
SQLITE_NULL, // Not used
|
||||
SQLITE_NULL, // Not used
|
||||
SQLITE_NULL // Not used
|
||||
},
|
||||
};
|
||||
// Create message table in the database
|
||||
bool create_message_table(sqlite3 *db)
|
||||
|
@ -122,7 +143,7 @@ bool flush_message_table(void)
|
|||
return true;
|
||||
}
|
||||
|
||||
static bool add_message(enum message_type type,
|
||||
static bool add_message(const enum message_type type, const bool unique,
|
||||
const char *message, const int count,...)
|
||||
{
|
||||
bool okay = false;
|
||||
|
@ -139,7 +160,7 @@ static bool add_message(enum message_type type,
|
|||
}
|
||||
|
||||
// Ensure there are no duplicates when adding host name or rate-limiting messages
|
||||
if(type == HOSTNAME_MESSAGE || type == RATE_LIMIT_MESSAGE)
|
||||
if(unique)
|
||||
{
|
||||
sqlite3_stmt* stmt = NULL;
|
||||
const char *querystr = "DELETE FROM message WHERE type = ?1 AND message = ?2";
|
||||
|
@ -225,6 +246,10 @@ static bool add_message(enum message_type type,
|
|||
rc = sqlite3_bind_int(stmt, 3 + j, va_arg(ap, int));
|
||||
break;
|
||||
|
||||
case SQLITE_FLOAT:
|
||||
rc = sqlite3_bind_double(stmt, 3 + j, va_arg(ap, double));
|
||||
break;
|
||||
|
||||
case SQLITE_TEXT:
|
||||
rc = sqlite3_bind_text(stmt, 3 + j, va_arg(ap, char*), -1, SQLITE_STATIC);
|
||||
break;
|
||||
|
@ -285,7 +310,7 @@ void logg_regex_warning(const char *type, const char *warning, const int dbindex
|
|||
|
||||
// Log to database only if not in CLI mode
|
||||
if(!cli_mode)
|
||||
add_message(REGEX_MESSAGE, warning, 3, type, regex, dbindex);
|
||||
add_message(REGEX_MESSAGE, false, warning, 3, type, regex, dbindex);
|
||||
}
|
||||
|
||||
void logg_subnet_warning(const char *ip, const int matching_count, const char *matching_ids,
|
||||
|
@ -300,7 +325,7 @@ void logg_subnet_warning(const char *ip, const int matching_count, const char *m
|
|||
|
||||
// Log to database
|
||||
char *names = get_client_names_from_ids(matching_ids);
|
||||
add_message(SUBNET_MESSAGE, ip, 5, matching_count, names, matching_ids, chosen_match_text, chosen_match_id);
|
||||
add_message(SUBNET_MESSAGE, false, ip, 5, matching_count, names, matching_ids, chosen_match_text, chosen_match_id);
|
||||
free(names);
|
||||
}
|
||||
|
||||
|
@ -311,7 +336,7 @@ void logg_hostname_warning(const char *ip, const char *name, const unsigned int
|
|||
ip, name, pos);
|
||||
|
||||
// Log to database
|
||||
add_message(HOSTNAME_MESSAGE, ip, 2, name, (const int)pos);
|
||||
add_message(HOSTNAME_MESSAGE, true, ip, 2, name, (const int)pos);
|
||||
}
|
||||
|
||||
void logg_fatal_dnsmasq_message(const char *message)
|
||||
|
@ -320,7 +345,7 @@ void logg_fatal_dnsmasq_message(const char *message)
|
|||
logg("FATAL ERROR in dnsmasq core: %s", message);
|
||||
|
||||
// Log to database
|
||||
add_message(DNSMASQ_CONFIG_MESSAGE, message, 0);
|
||||
add_message(DNSMASQ_CONFIG_MESSAGE, false, message, 0);
|
||||
|
||||
// FTL will dies after this point, so we should make sure to clean up
|
||||
// behind ourselves
|
||||
|
@ -336,7 +361,7 @@ void logg_rate_limit_message(const char *clientIP, const unsigned int rate_limit
|
|||
clientIP, turnaround, turnaround == 1 ? "" : "s");
|
||||
|
||||
// Log to database
|
||||
add_message(RATE_LIMIT_MESSAGE, clientIP, 2, config.rate_limit.count, config.rate_limit.interval);
|
||||
add_message(RATE_LIMIT_MESSAGE, true, clientIP, 2, config.rate_limit.count, config.rate_limit.interval);
|
||||
}
|
||||
|
||||
void logg_warn_dnsmasq_message(char *message)
|
||||
|
@ -345,5 +370,24 @@ void logg_warn_dnsmasq_message(char *message)
|
|||
logg("WARNING in dnsmasq core: %s", message);
|
||||
|
||||
// Log to database
|
||||
add_message(DNSMASQ_WARN_MESSAGE, message, 0);
|
||||
add_message(DNSMASQ_WARN_MESSAGE, false, message, 0);
|
||||
}
|
||||
|
||||
void log_resource_shortage(const double load, const int nprocs, const int shmem, const int disk, const char *path, const char *msg)
|
||||
{
|
||||
if(load > 0.0)
|
||||
{
|
||||
logg("WARNING: Long-term load (15min avg) larger than number of processors: %.1f > %d", load, nprocs);
|
||||
add_message(LOAD_MESSAGE, true, "excessive load", 2, load, nprocs);
|
||||
}
|
||||
else if(shmem > -1)
|
||||
{
|
||||
logg("WARNING: RAM shortage (%s) ahead: %d%% is used (%s)", path, shmem, msg);
|
||||
add_message(SHMEM_MESSAGE, true, path, 2, shmem, msg);
|
||||
}
|
||||
else if(disk > -1)
|
||||
{
|
||||
logg("WARNING: Disk shortage (%s) ahead: %d%% is used (%s)", path, disk, msg);
|
||||
add_message(DISK_MESSAGE, true, path, 2, disk, msg);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -22,5 +22,6 @@ void logg_hostname_warning(const char *ip, const char *name, const unsigned int
|
|||
void logg_fatal_dnsmasq_message(const char *message);
|
||||
void logg_rate_limit_message(const char *clientIP, const unsigned int rate_limit_count);
|
||||
void logg_warn_dnsmasq_message(char *message);
|
||||
void log_resource_shortage(const double load, const int nprocs, const int shmem, const int disk, const char *path, const char *msg);
|
||||
|
||||
#endif //MESSAGETABLE_H
|
||||
|
|
|
@ -201,6 +201,9 @@ enum message_type {
|
|||
DNSMASQ_CONFIG_MESSAGE,
|
||||
RATE_LIMIT_MESSAGE,
|
||||
DNSMASQ_WARN_MESSAGE,
|
||||
LOAD_MESSAGE,
|
||||
SHMEM_MESSAGE,
|
||||
DISK_MESSAGE,
|
||||
MAX_MESSAGE
|
||||
} __attribute__ ((packed));
|
||||
|
||||
|
|
57
src/files.c
57
src/files.c
|
@ -22,6 +22,10 @@
|
|||
#include <grp.h>
|
||||
// NAME_MAX
|
||||
#include <limits.h>
|
||||
// statvfs()
|
||||
#include <sys/statvfs.h>
|
||||
// dirname()
|
||||
#include <libgen.h>
|
||||
|
||||
// chmod_file() changes the file mode bits of a given file (relative
|
||||
// to the directory file descriptor) according to mode. mode is an
|
||||
|
@ -146,3 +150,56 @@ void ls_dir(const char* path)
|
|||
// Close directory stream
|
||||
closedir(dirp);
|
||||
}
|
||||
|
||||
int get_path_usage(const char *path, char buffer[64])
|
||||
{
|
||||
// Get filesystem information about /dev/shm (typically a tmpfs)
|
||||
struct statvfs f;
|
||||
if(statvfs(path, &f) != 0)
|
||||
{
|
||||
// If statvfs() failed, we return the error instead
|
||||
strncpy(buffer, strerror(errno), 64);
|
||||
buffer[63] = '\0';
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Explicitly cast the block counts to unsigned long long to avoid
|
||||
// overflowing with drives larger than 4 GB on 32bit systems
|
||||
const unsigned long long size = (unsigned long long)f.f_blocks * f.f_frsize;
|
||||
const unsigned long long free = (unsigned long long)f.f_bavail * f.f_bsize;
|
||||
const unsigned long long used = size - free;
|
||||
|
||||
// Create human-readable total size
|
||||
char prefix_size[2] = { 0 };
|
||||
double formated_size = 0.0;
|
||||
format_memory_size(prefix_size, size, &formated_size);
|
||||
|
||||
// Generate human-readable "total used" size
|
||||
char prefix_used[2] = { 0 };
|
||||
double formated_used = 0.0;
|
||||
format_memory_size(prefix_used, used, &formated_used);
|
||||
|
||||
// Print result into buffer passed to this subroutine
|
||||
snprintf(buffer, 64, "%s: %.1f%sB used, %.1f%sB total", path,
|
||||
formated_used, prefix_used, formated_size, prefix_size);
|
||||
|
||||
// Return percentage of used shared memory
|
||||
// Adding 1 avoids FPE if the size turns out to be zero
|
||||
return (used*100)/(size + 1);
|
||||
}
|
||||
|
||||
int get_filepath_usage(const char *file, char buffer[64])
|
||||
{
|
||||
if(file == NULL || strlen(file) == 0)
|
||||
return -1;
|
||||
|
||||
// Get path from file, we duplicate the string
|
||||
// here as dirname() modifies the string inplace
|
||||
char path[PATH_MAX] = { 0 };
|
||||
strncpy(path, file, sizeof(path)-1);
|
||||
path[sizeof(path)-1] = '\0';
|
||||
dirname(path);
|
||||
|
||||
// Get percentage of disk usage at this path
|
||||
return get_path_usage(path, buffer);
|
||||
}
|
||||
|
|
|
@ -14,5 +14,7 @@ bool chmod_file(const char *filename, const mode_t mode);
|
|||
bool file_exists(const char *filename);
|
||||
unsigned long long get_FTL_db_filesize(void);
|
||||
void ls_dir(const char* path);
|
||||
int get_path_usage(const char *path, char buffer[64]);
|
||||
int get_filepath_usage(const char *file, char buffer[64]);
|
||||
|
||||
#endif //FILE_H
|
||||
|
|
48
src/gc.c
48
src/gc.c
|
@ -22,6 +22,18 @@
|
|||
#include "datastructure.h"
|
||||
// logg_rate_limit_message()
|
||||
#include "database/message-table.h"
|
||||
// get_nprocs()
|
||||
#include <sys/sysinfo.h>
|
||||
// get_filepath_usage()
|
||||
#include "files.h"
|
||||
|
||||
// Warn if space in a storage place exceeds this threshold
|
||||
// default: 90%
|
||||
#define WARN_LIMIT 90
|
||||
|
||||
// Resource checking interval
|
||||
// default: 300 seconds
|
||||
#define RCinterval 300
|
||||
|
||||
bool doGC = false;
|
||||
|
||||
|
@ -67,6 +79,32 @@ time_t get_rate_limit_turnaround(const unsigned int rate_limit_count)
|
|||
return (time_t)config.rate_limit.interval*how_often - (time(NULL) - lastRateLimitCleaner);
|
||||
}
|
||||
|
||||
static void check_space(const char *file)
|
||||
{
|
||||
int perc = 0;
|
||||
char buffer[64] = { 0 };
|
||||
// Warn if space usage at the device holding the corresponding file
|
||||
// exceeds the configured threshold
|
||||
if((perc = get_filepath_usage(file, buffer)) > WARN_LIMIT)
|
||||
log_resource_shortage(-1.0, 0, -1, perc, file, buffer);
|
||||
}
|
||||
|
||||
static void check_load(void)
|
||||
{
|
||||
// Get CPU load averages
|
||||
double load[3];
|
||||
if (getloadavg(load, 3) == -1)
|
||||
return;
|
||||
|
||||
// Get number of CPU cores
|
||||
const int nprocs = get_nprocs();
|
||||
|
||||
// Warn if 15 minute average of load exceeds number of available
|
||||
// processors
|
||||
if(load[2] > nprocs)
|
||||
log_resource_shortage(load[2], nprocs, -1, -1, NULL, NULL);
|
||||
}
|
||||
|
||||
void *GC_thread(void *val)
|
||||
{
|
||||
// Set thread name
|
||||
|
@ -76,6 +114,7 @@ void *GC_thread(void *val)
|
|||
// Remember when we last ran the actions
|
||||
time_t lastGCrun = time(NULL) - time(NULL)%GCinterval;
|
||||
lastRateLimitCleaner = time(NULL);
|
||||
time_t lastResourceCheck = 0;
|
||||
|
||||
// Run as long as this thread is not canceled
|
||||
while(!killed)
|
||||
|
@ -93,6 +132,15 @@ void *GC_thread(void *val)
|
|||
if(killed)
|
||||
break;
|
||||
|
||||
// Check available resources
|
||||
if(now - lastResourceCheck >= RCinterval)
|
||||
{
|
||||
check_load();
|
||||
check_space(FTLfiles.FTL_db);
|
||||
check_space(FTLfiles.log);
|
||||
lastResourceCheck = now;
|
||||
}
|
||||
|
||||
if(now - GCdelay - lastGCrun >= GCinterval || doGC)
|
||||
{
|
||||
doGC = false;
|
||||
|
|
46
src/shmem.c
46
src/shmem.c
|
@ -16,8 +16,6 @@
|
|||
#include "config.h"
|
||||
// data getter functions
|
||||
#include "datastructure.h"
|
||||
// statvfs()
|
||||
#include <sys/statvfs.h>
|
||||
// get_num_regex()
|
||||
#include "regex_r.h"
|
||||
// NAME_MAX
|
||||
|
@ -26,6 +24,10 @@
|
|||
#include "daemon.h"
|
||||
// generate_backtrace()
|
||||
#include "signals.h"
|
||||
// get_path_usage()
|
||||
#include "files.h"
|
||||
// log_resource_shortage()
|
||||
#include "database/message-table.h"
|
||||
|
||||
/// The version of shared memory used
|
||||
#define SHARED_MEMORY_VERSION 14
|
||||
|
@ -114,31 +116,8 @@ static void *enlarge_shmem_struct(const char type);
|
|||
|
||||
static int get_dev_shm_usage(char buffer[64])
|
||||
{
|
||||
// Get filesystem information about /dev/shm (typically a tmpfs)
|
||||
struct statvfs f;
|
||||
if(statvfs(SHMEM_PATH, &f) != 0)
|
||||
{
|
||||
// If statvfs() failed, we return the error instead
|
||||
strncpy(buffer, strerror(errno), 64);
|
||||
buffer[63] = '\0';
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Explicitly cast the block counts to unsigned long long to avoid
|
||||
// overflowing with drives larger than 4 GB on 32bit systems
|
||||
const unsigned long long size = (unsigned long long)f.f_blocks * f.f_frsize;
|
||||
const unsigned long long free = (unsigned long long)f.f_bavail * f.f_bsize;
|
||||
const unsigned long long used = size - free;
|
||||
|
||||
// Create human-readable total size
|
||||
char prefix_size[2] = { 0 };
|
||||
double formated_size = 0.0;
|
||||
format_memory_size(prefix_size, size, &formated_size);
|
||||
|
||||
// Generate human-readable "total used" size
|
||||
char prefix_used[2] = { 0 };
|
||||
double formated_used = 0.0;
|
||||
format_memory_size(prefix_used, used, &formated_used);
|
||||
char buffer2[64] = { 0 };
|
||||
const int percentage = get_path_usage(SHMEM_PATH, buffer2);
|
||||
|
||||
// Generate human-readable "used by FTL" size
|
||||
char prefix_FTL[2] = { 0 };
|
||||
|
@ -146,12 +125,11 @@ static int get_dev_shm_usage(char buffer[64])
|
|||
format_memory_size(prefix_FTL, used_shmem, &formated_FTL);
|
||||
|
||||
// Print result into buffer passed to this subroutine
|
||||
snprintf(buffer, 64, SHMEM_PATH": %.1f%sB used, %.1f%sB total, FTL uses %.1f%sB",
|
||||
formated_used, prefix_used, formated_size, prefix_size, formated_FTL, prefix_FTL);
|
||||
snprintf(buffer, 64, "%s, FTL uses %.1f%sB",
|
||||
buffer2, formated_FTL, prefix_FTL);
|
||||
|
||||
// Return percentage of used shared memory
|
||||
// Adding 1 avoids FPE if the size turns out to be zero
|
||||
return (used*100)/(size + 1);
|
||||
// Return percentage
|
||||
return percentage;
|
||||
}
|
||||
|
||||
// chown_shmem() changes the file ownership of a given shared memory object
|
||||
|
@ -626,7 +604,7 @@ static SharedMemory create_shm(const char *name, const size_t size, bool create_
|
|||
logg("Creating shared memory with name \"%s\" and size %zu (%s)", name, size, df);
|
||||
}
|
||||
if(percentage > SHMEM_WARN_LIMIT)
|
||||
logg("WARNING: More than %u%% of "SHMEM_PATH" is used", SHMEM_WARN_LIMIT);
|
||||
log_resource_shortage(-1.0, 0, percentage, -1, SHMEM_PATH, df);
|
||||
|
||||
SharedMemory sharedMemory = {
|
||||
.name = name,
|
||||
|
@ -765,7 +743,7 @@ static bool realloc_shm(SharedMemory *sharedMemory, const size_t size1, const si
|
|||
sharedMemory->name, sharedMemory->size, size1, size2, size);
|
||||
|
||||
if(percentage > SHMEM_WARN_LIMIT)
|
||||
logg("WARNING: More than %u%% of "SHMEM_PATH" is used", SHMEM_WARN_LIMIT);
|
||||
log_resource_shortage(-1.0, 0, percentage, -1, SHMEM_PATH, df);
|
||||
|
||||
// Resize shard memory object if requested
|
||||
// If not, we only remap a shared memory object which might have changed
|
||||
|
|
Loading…
Reference in New Issue