Warn about resource shortages (15min load average exceeding number of cores, available disk space for database and log file, and available shared memory (RAM))

Signed-off-by: DL6ER <dl6er@dl6er.de>
This commit is contained in:
DL6ER 2021-11-29 13:29:00 +01:00
parent 0d44449e7e
commit 4c2c59b214
No known key found for this signature in database
GPG Key ID: 00135ACBD90B28DD
7 changed files with 177 additions and 44 deletions

View File

@ -27,7 +27,7 @@
#include "../gc.h"
static const char *message_types[MAX_MESSAGE] =
{ "REGEX", "SUBNET", "HOSTNAME", "DNSMASQ_CONFIG", "RATE_LIMIT" , "DNSMASQ_WARN" };
{ "REGEX", "SUBNET", "HOSTNAME", "DNSMASQ_CONFIG", "RATE_LIMIT", "DNSMASQ_WARN", "LOAD", "SHMEM", "DISK" };
static unsigned char message_blob_types[MAX_MESSAGE][5] =
{
@ -59,7 +59,7 @@ static unsigned char message_blob_types[MAX_MESSAGE][5] =
SQLITE_NULL, // Not used
SQLITE_NULL // Not used
},
{ // RATE_LIMIT: The message column contains the IP address of the client in question
{ // RATE_LIMIT_MESSAGE: The message column contains the IP address of the client in question
SQLITE_INTEGER, // Configured maximum number of queries
SQLITE_INTEGER, // Configured rate-limiting interval [seconds]
SQLITE_NULL, // Not used
@ -73,6 +73,27 @@ static unsigned char message_blob_types[MAX_MESSAGE][5] =
SQLITE_NULL, // Not used
SQLITE_NULL // Not used
},
{ // LOAD_MESSAGE: The message column contains a general message
SQLITE_FLOAT, // 15min load average
SQLITE_INTEGER, // Number of cores
SQLITE_NULL, // Not used
SQLITE_NULL, // Not used
SQLITE_NULL // Not used
},
{ // SHMEM_MESSAGE: The message column contains the corresponding path
SQLITE_INTEGER, // Percentage currently used
SQLITE_TEXT, // Human-readable details about memory/disk usage
SQLITE_NULL, // Not used
SQLITE_NULL, // Not used
SQLITE_NULL // Not used
},
{ // DISK_MESSAGE: The message column contains the corresponding path
SQLITE_INTEGER, // Percentage currently used
SQLITE_TEXT, // Human-readable details about memory/disk usage
SQLITE_NULL, // Not used
SQLITE_NULL, // Not used
SQLITE_NULL // Not used
},
};
// Create message table in the database
bool create_message_table(sqlite3 *db)
@ -122,7 +143,7 @@ bool flush_message_table(void)
return true;
}
static bool add_message(enum message_type type,
static bool add_message(const enum message_type type, const bool unique,
const char *message, const int count,...)
{
bool okay = false;
@ -139,7 +160,7 @@ static bool add_message(enum message_type type,
}
// Ensure there are no duplicates when adding host name or rate-limiting messages
if(type == HOSTNAME_MESSAGE || type == RATE_LIMIT_MESSAGE)
if(unique)
{
sqlite3_stmt* stmt = NULL;
const char *querystr = "DELETE FROM message WHERE type = ?1 AND message = ?2";
@ -225,6 +246,10 @@ static bool add_message(enum message_type type,
rc = sqlite3_bind_int(stmt, 3 + j, va_arg(ap, int));
break;
case SQLITE_FLOAT:
rc = sqlite3_bind_double(stmt, 3 + j, va_arg(ap, double));
break;
case SQLITE_TEXT:
rc = sqlite3_bind_text(stmt, 3 + j, va_arg(ap, char*), -1, SQLITE_STATIC);
break;
@ -285,7 +310,7 @@ void logg_regex_warning(const char *type, const char *warning, const int dbindex
// Log to database only if not in CLI mode
if(!cli_mode)
add_message(REGEX_MESSAGE, warning, 3, type, regex, dbindex);
add_message(REGEX_MESSAGE, false, warning, 3, type, regex, dbindex);
}
void logg_subnet_warning(const char *ip, const int matching_count, const char *matching_ids,
@ -300,7 +325,7 @@ void logg_subnet_warning(const char *ip, const int matching_count, const char *m
// Log to database
char *names = get_client_names_from_ids(matching_ids);
add_message(SUBNET_MESSAGE, ip, 5, matching_count, names, matching_ids, chosen_match_text, chosen_match_id);
add_message(SUBNET_MESSAGE, false, ip, 5, matching_count, names, matching_ids, chosen_match_text, chosen_match_id);
free(names);
}
@ -311,7 +336,7 @@ void logg_hostname_warning(const char *ip, const char *name, const unsigned int
ip, name, pos);
// Log to database
add_message(HOSTNAME_MESSAGE, ip, 2, name, (const int)pos);
add_message(HOSTNAME_MESSAGE, true, ip, 2, name, (const int)pos);
}
void logg_fatal_dnsmasq_message(const char *message)
@ -320,7 +345,7 @@ void logg_fatal_dnsmasq_message(const char *message)
logg("FATAL ERROR in dnsmasq core: %s", message);
// Log to database
add_message(DNSMASQ_CONFIG_MESSAGE, message, 0);
add_message(DNSMASQ_CONFIG_MESSAGE, false, message, 0);
// FTL will dies after this point, so we should make sure to clean up
// behind ourselves
@ -336,7 +361,7 @@ void logg_rate_limit_message(const char *clientIP, const unsigned int rate_limit
clientIP, turnaround, turnaround == 1 ? "" : "s");
// Log to database
add_message(RATE_LIMIT_MESSAGE, clientIP, 2, config.rate_limit.count, config.rate_limit.interval);
add_message(RATE_LIMIT_MESSAGE, true, clientIP, 2, config.rate_limit.count, config.rate_limit.interval);
}
void logg_warn_dnsmasq_message(char *message)
@ -345,5 +370,24 @@ void logg_warn_dnsmasq_message(char *message)
logg("WARNING in dnsmasq core: %s", message);
// Log to database
add_message(DNSMASQ_WARN_MESSAGE, message, 0);
add_message(DNSMASQ_WARN_MESSAGE, false, message, 0);
}
void log_resource_shortage(const double load, const int nprocs, const int shmem, const int disk, const char *path, const char *msg)
{
if(load > 0.0)
{
logg("WARNING: Long-term load (15min avg) larger than number of processors: %.1f > %d", load, nprocs);
add_message(LOAD_MESSAGE, true, "excessive load", 2, load, nprocs);
}
else if(shmem > -1)
{
logg("WARNING: RAM shortage (%s) ahead: %d%% is used (%s)", path, shmem, msg);
add_message(SHMEM_MESSAGE, true, path, 2, shmem, msg);
}
else if(disk > -1)
{
logg("WARNING: Disk shortage (%s) ahead: %d%% is used (%s)", path, disk, msg);
add_message(DISK_MESSAGE, true, path, 2, disk, msg);
}
}

View File

@ -22,5 +22,6 @@ void logg_hostname_warning(const char *ip, const char *name, const unsigned int
void logg_fatal_dnsmasq_message(const char *message);
void logg_rate_limit_message(const char *clientIP, const unsigned int rate_limit_count);
void logg_warn_dnsmasq_message(char *message);
void log_resource_shortage(const double load, const int nprocs, const int shmem, const int disk, const char *path, const char *msg);
#endif //MESSAGETABLE_H

View File

@ -201,6 +201,9 @@ enum message_type {
DNSMASQ_CONFIG_MESSAGE,
RATE_LIMIT_MESSAGE,
DNSMASQ_WARN_MESSAGE,
LOAD_MESSAGE,
SHMEM_MESSAGE,
DISK_MESSAGE,
MAX_MESSAGE
} __attribute__ ((packed));

View File

@ -22,6 +22,10 @@
#include <grp.h>
// NAME_MAX
#include <limits.h>
// statvfs()
#include <sys/statvfs.h>
// dirname()
#include <libgen.h>
// chmod_file() changes the file mode bits of a given file (relative
// to the directory file descriptor) according to mode. mode is an
@ -146,3 +150,56 @@ void ls_dir(const char* path)
// Close directory stream
closedir(dirp);
}
int get_path_usage(const char *path, char buffer[64])
{
// Get filesystem information about /dev/shm (typically a tmpfs)
struct statvfs f;
if(statvfs(path, &f) != 0)
{
// If statvfs() failed, we return the error instead
strncpy(buffer, strerror(errno), 64);
buffer[63] = '\0';
return 0;
}
// Explicitly cast the block counts to unsigned long long to avoid
// overflowing with drives larger than 4 GB on 32bit systems
const unsigned long long size = (unsigned long long)f.f_blocks * f.f_frsize;
const unsigned long long free = (unsigned long long)f.f_bavail * f.f_bsize;
const unsigned long long used = size - free;
// Create human-readable total size
char prefix_size[2] = { 0 };
double formated_size = 0.0;
format_memory_size(prefix_size, size, &formated_size);
// Generate human-readable "total used" size
char prefix_used[2] = { 0 };
double formated_used = 0.0;
format_memory_size(prefix_used, used, &formated_used);
// Print result into buffer passed to this subroutine
snprintf(buffer, 64, "%s: %.1f%sB used, %.1f%sB total", path,
formated_used, prefix_used, formated_size, prefix_size);
// Return percentage of used shared memory
// Adding 1 avoids FPE if the size turns out to be zero
return (used*100)/(size + 1);
}
int get_filepath_usage(const char *file, char buffer[64])
{
if(file == NULL || strlen(file) == 0)
return -1;
// Get path from file, we duplicate the string
// here as dirname() modifies the string inplace
char path[PATH_MAX] = { 0 };
strncpy(path, file, sizeof(path)-1);
path[sizeof(path)-1] = '\0';
dirname(path);
// Get percentage of disk usage at this path
return get_path_usage(path, buffer);
}

View File

@ -14,5 +14,7 @@ bool chmod_file(const char *filename, const mode_t mode);
bool file_exists(const char *filename);
unsigned long long get_FTL_db_filesize(void);
void ls_dir(const char* path);
int get_path_usage(const char *path, char buffer[64]);
int get_filepath_usage(const char *file, char buffer[64]);
#endif //FILE_H

View File

@ -22,6 +22,18 @@
#include "datastructure.h"
// logg_rate_limit_message()
#include "database/message-table.h"
// get_nprocs()
#include <sys/sysinfo.h>
// get_filepath_usage()
#include "files.h"
// Warn if space in a storage place exceeds this threshold
// default: 90%
#define WARN_LIMIT 90
// Resource checking interval
// default: 300 seconds
#define RCinterval 300
bool doGC = false;
@ -67,6 +79,32 @@ time_t get_rate_limit_turnaround(const unsigned int rate_limit_count)
return (time_t)config.rate_limit.interval*how_often - (time(NULL) - lastRateLimitCleaner);
}
static void check_space(const char *file)
{
int perc = 0;
char buffer[64] = { 0 };
// Warn if space usage at the device holding the corresponding file
// exceeds the configured threshold
if((perc = get_filepath_usage(file, buffer)) > WARN_LIMIT)
log_resource_shortage(-1.0, 0, -1, perc, file, buffer);
}
static void check_load(void)
{
// Get CPU load averages
double load[3];
if (getloadavg(load, 3) == -1)
return;
// Get number of CPU cores
const int nprocs = get_nprocs();
// Warn if 15 minute average of load exceeds number of available
// processors
if(load[2] > nprocs)
log_resource_shortage(load[2], nprocs, -1, -1, NULL, NULL);
}
void *GC_thread(void *val)
{
// Set thread name
@ -76,6 +114,7 @@ void *GC_thread(void *val)
// Remember when we last ran the actions
time_t lastGCrun = time(NULL) - time(NULL)%GCinterval;
lastRateLimitCleaner = time(NULL);
time_t lastResourceCheck = 0;
// Run as long as this thread is not canceled
while(!killed)
@ -93,6 +132,15 @@ void *GC_thread(void *val)
if(killed)
break;
// Check available resources
if(now - lastResourceCheck >= RCinterval)
{
check_load();
check_space(FTLfiles.FTL_db);
check_space(FTLfiles.log);
lastResourceCheck = now;
}
if(now - GCdelay - lastGCrun >= GCinterval || doGC)
{
doGC = false;

View File

@ -16,8 +16,6 @@
#include "config.h"
// data getter functions
#include "datastructure.h"
// statvfs()
#include <sys/statvfs.h>
// get_num_regex()
#include "regex_r.h"
// NAME_MAX
@ -26,6 +24,10 @@
#include "daemon.h"
// generate_backtrace()
#include "signals.h"
// get_path_usage()
#include "files.h"
// log_resource_shortage()
#include "database/message-table.h"
/// The version of shared memory used
#define SHARED_MEMORY_VERSION 14
@ -114,31 +116,8 @@ static void *enlarge_shmem_struct(const char type);
static int get_dev_shm_usage(char buffer[64])
{
// Get filesystem information about /dev/shm (typically a tmpfs)
struct statvfs f;
if(statvfs(SHMEM_PATH, &f) != 0)
{
// If statvfs() failed, we return the error instead
strncpy(buffer, strerror(errno), 64);
buffer[63] = '\0';
return 0;
}
// Explicitly cast the block counts to unsigned long long to avoid
// overflowing with drives larger than 4 GB on 32bit systems
const unsigned long long size = (unsigned long long)f.f_blocks * f.f_frsize;
const unsigned long long free = (unsigned long long)f.f_bavail * f.f_bsize;
const unsigned long long used = size - free;
// Create human-readable total size
char prefix_size[2] = { 0 };
double formated_size = 0.0;
format_memory_size(prefix_size, size, &formated_size);
// Generate human-readable "total used" size
char prefix_used[2] = { 0 };
double formated_used = 0.0;
format_memory_size(prefix_used, used, &formated_used);
char buffer2[64] = { 0 };
const int percentage = get_path_usage(SHMEM_PATH, buffer2);
// Generate human-readable "used by FTL" size
char prefix_FTL[2] = { 0 };
@ -146,12 +125,11 @@ static int get_dev_shm_usage(char buffer[64])
format_memory_size(prefix_FTL, used_shmem, &formated_FTL);
// Print result into buffer passed to this subroutine
snprintf(buffer, 64, SHMEM_PATH": %.1f%sB used, %.1f%sB total, FTL uses %.1f%sB",
formated_used, prefix_used, formated_size, prefix_size, formated_FTL, prefix_FTL);
snprintf(buffer, 64, "%s, FTL uses %.1f%sB",
buffer2, formated_FTL, prefix_FTL);
// Return percentage of used shared memory
// Adding 1 avoids FPE if the size turns out to be zero
return (used*100)/(size + 1);
// Return percentage
return percentage;
}
// chown_shmem() changes the file ownership of a given shared memory object
@ -626,7 +604,7 @@ static SharedMemory create_shm(const char *name, const size_t size, bool create_
logg("Creating shared memory with name \"%s\" and size %zu (%s)", name, size, df);
}
if(percentage > SHMEM_WARN_LIMIT)
logg("WARNING: More than %u%% of "SHMEM_PATH" is used", SHMEM_WARN_LIMIT);
log_resource_shortage(-1.0, 0, percentage, -1, SHMEM_PATH, df);
SharedMemory sharedMemory = {
.name = name,
@ -765,7 +743,7 @@ static bool realloc_shm(SharedMemory *sharedMemory, const size_t size1, const si
sharedMemory->name, sharedMemory->size, size1, size2, size);
if(percentage > SHMEM_WARN_LIMIT)
logg("WARNING: More than %u%% of "SHMEM_PATH" is used", SHMEM_WARN_LIMIT);
log_resource_shortage(-1.0, 0, percentage, -1, SHMEM_PATH, df);
// Resize shard memory object if requested
// If not, we only remap a shared memory object which might have changed