Expose GZIP compressor via CLI

Signed-off-by: DL6ER <dl6er@dl6er.de>
This commit is contained in:
DL6ER 2023-01-21 19:43:22 +01:00
parent 4026d5f7a2
commit acf187d8b3
No known key found for this signature in database
GPG Key ID: 00135ACBD90B28DD
5 changed files with 230 additions and 148 deletions

View File

@ -42,6 +42,8 @@
#include "ph7/ph7.h"
#include "config/cli.h"
#include "config/config.h"
// compression functions
#include "miniz/compression.h"
// defined in dnsmasq.c
extern void print_dnsmasq_version(const char *yellow, const char *green, const char *bold, const char *normal);
@ -172,6 +174,27 @@ void parse_args(int argc, char* argv[])
(argc > 1 && strEndsWith(argv[1], ".db")))
exit(sqlite3_shell_main(argc, argv));
// Compression feature
if((argc == 3 || argc == 4) && strcmp(argv[1], "--compress") == 0)
{
const char *infile = argv[2];
char *outfile = NULL;
if(argc == 4)
outfile = argv[3];
else
{
// If no output file is given, we use the input file name with ".gz" appended
outfile = calloc(strlen(infile)+4, sizeof(char));
strcpy(outfile, infile);
strcat(outfile, ".gz");
}
// Enable stdout printing
cli_mode = true;
log_ctrl(false, true);
// Compress file
exit(compress_file(infile, outfile, true) ? EXIT_SUCCESS : EXIT_FAILURE);
}
// Set config option through CLI
if(argc > 1 && strcmp(argv[1], "--config") == 0)
{
@ -606,7 +629,15 @@ void parse_args(int argc, char* argv[])
printf("%sConfig options:%s\n", yellow, normal);
printf("\t%s--config %skey%s Get current value of config item %skey%s\n", green, blue, normal, blue, normal);
printf("\t%s--config %skey %svalue%s Set new %svalue%s of config item %skey%s\n", green, blue, cyan, normal, cyan, normal, blue, normal);
printf("\t%s--config %skey %svalue%s Set new %svalue%s of config item %skey%s\n\n", green, blue, cyan, normal, cyan, normal, blue, normal);
printf("%sEmbedded GZIP compressor:%s\n", yellow, normal);
printf(" A simple but fast gzip-compatible compressor optimized\n");
printf(" for processing small files (< 100 MB).\n");
printf(" Usage: %spihole-FTL --compress %sinfile %s[outfile]%s\n\n", green, cyan, purple, normal);
printf(" - %sinfile%s is the file to be compressed.\n", cyan, normal);
printf(" - %s[outfile]%s is the optional target. If omitted, FTL will\n", purple, normal);
printf(" use the %sinfile%s and append %s.gz%s at the end.\n\n", cyan, normal, cyan, normal);
printf("%sOther:%s\n", yellow, normal);
printf("\t%sdhcp-discover%s Discover DHCP servers in the local\n", green, normal);

View File

@ -27,9 +27,7 @@
// dirname()
#include <libgen.h>
// compression functions
#include "miniz/miniz.h"
static bool compress_file(const char *in, const char* out);
#include "miniz/compression.h"
// chmod_file() changes the file mode bits of a given file (relative
// to the directory file descriptor) according to mode. mode is an
@ -332,7 +330,7 @@ void rotate_files(const char *path)
else
log_debug(DEBUG_CONFIG, "Compressing %s.{%u -> %u.gz}",
path, i-1, i);
if(compress_file(new_path, new_path_compressed))
if(compress_file(new_path, new_path_compressed, false))
{
// On success, we remove the uncompressed file
remove(new_path);
@ -365,149 +363,6 @@ void rotate_files(const char *path)
}
}
static bool compress_file(const char *in, const char *out)
{
// Read entire file into memory
FILE *fp = fopen(in, "rb");
if(fp == NULL)
{
log_warn("compress_file(): failed to open %s: %s (%d)", in, strerror(errno), errno);
return false;
}
// Get file size
fseek(fp, 0, SEEK_END);
const mz_ulong size = ftell(fp);
fseek(fp, 0, SEEK_SET);
// Read file into memory
unsigned char *buffer = malloc(size);
if(buffer == NULL)
{
log_warn("compress_file(): failed to allocate %lu bytes of memory", (unsigned long)size);
fclose(fp);
return false;
}
if(fread(buffer, 1, size, fp) != size)
{
log_warn("compress_file(): failed to read %lu bytes from %s", (unsigned long)size, in);
fclose(fp);
free(buffer);
return false;
}
fclose(fp);
// Allocate memory for compressed file
// (compressBound() returns the maximum size of the compressed data)
mz_ulong size_compressed = compressBound(size);
unsigned char *buffer_compressed = malloc(size_compressed);
if(buffer_compressed == NULL)
{
log_warn("compress_file(): failed to allocate %lu bytes of memory", (unsigned long)size_compressed);
free(buffer);
return false;
}
// Compress file (ZLIB stream format - not GZIP! - see https://tools.ietf.org/html/rfc1950)
int ret = compress2(buffer_compressed, &size_compressed, buffer, size, Z_BEST_COMPRESSION);
if(ret != Z_OK)
{
log_warn("compress_file(): failed to compress %s: %s (%d)", in, zError(ret), ret);
free(buffer);
free(buffer_compressed);
return false;
}
// Create compressed file
fp = fopen(out, "wb");
if(fp == NULL)
{
log_warn("compress_file(): failed to open %s: %s (%d)", out, strerror(errno), errno);
free(buffer);
free(buffer_compressed);
return false;
}
// Generate GZIP header (without timestamp and extra flags)
// (see https://tools.ietf.org/html/rfc1952#section-2.3)
//
// 0 1 2 3 4 5 6 7 8 9
// +---+---+---+---+---+---+---+---+---+---+
// |ID1|ID2|CM |FLG| MTIME |XFL|OS | (more-->)
// +---+---+---+---+---+---+---+---+---+---+
//
// 1F8B: magic number
// 08: compression method (deflate)
// 01: flags (FTEXT is set)
// 00000000: timestamp (set later). For simplicity, we set it to the current time
// 02: extra flags (maximum compression)
// 03: operating system (Unix)
const unsigned char gzip_header[] = { 0x1F, 0x8B, 0x08, 0x01, 0x00, 0x00, 0x00, 0x00, 0x02, 0x03 };
// Set timestamp
uint32_t now = htole32(time(NULL));
memcpy((void*)(gzip_header+4), &now, sizeof(now));
// Write header
if(fwrite(gzip_header, 1, sizeof(gzip_header), fp) != sizeof(gzip_header))
{
log_warn("compress_file(): failed to write GZIP header to %s", out);
fclose(fp);
free(buffer);
free(buffer_compressed);
return false;
}
// Write compressed data, strip ZLIB header (first two bytes) and footer (last four bytes)
// +=======================+
// |...compressed blocks...| (more-->)
// +=======================+
if(fwrite(buffer_compressed + 2, 1, size_compressed - (2 + 4), fp) != size_compressed-6)
{
log_warn("compress_file(): failed to write %lu bytes to %s", (unsigned long)size_compressed, out);
fclose(fp);
free(buffer);
free(buffer_compressed);
return false;
}
// Write GZIP footer (CRC32 and uncompressed size)
// (see https://tools.ietf.org/html/rfc1952#section-2.3)
//
// 0 1 2 3 4 5 6 7
// +---+---+---+---+---+---+---+---+
// | CRC32 | ISIZE |
// +---+---+---+---+---+---+---+---+
//
// CRC32: This contains a Cyclic Redundancy Check value of the
// uncompressed data computed according to CRC-32 algorithm used in
// the ISO 3309 standard and in section 8.1.1.6.2 of ITU-T
// recommendation V.42. (See http://www.iso.ch for ordering ISO
// documents. See gopher://info.itu.ch for an online version of
// ITU-T V.42.)
// isize: This contains the size of the original (uncompressed) input
// data modulo 2^32 (little endian).
uint32_t crc = mz_crc32(MZ_CRC32_INIT, buffer, size);
uint32_t isize = htole32(size);
free(buffer);
if(fwrite(&crc, 1, sizeof(crc), fp) != sizeof(crc))
{
log_warn("compress_file(): failed to write CRC32 to %s", out);
fclose(fp);
free(buffer_compressed);
return false;
}
if(fwrite(&isize, 1, sizeof(isize), fp) != sizeof(isize))
{
log_warn("compress_file(): failed to write isize to %s", out);
fclose(fp);
free(buffer_compressed);
return false;
}
fclose(fp);
free(buffer_compressed);
return true;
}
// Credits: https://stackoverflow.com/a/55410469
int parse_line(char *line, char **key, char **value)
{

View File

@ -9,6 +9,8 @@
# Please see LICENSE file for your rights under this license.
set(sources
compression.c
compression.h
miniz.c
miniz.h
)

177
src/miniz/compression.c Normal file
View File

@ -0,0 +1,177 @@
/* Pi-hole: A black hole for Internet advertisements
* (c) 2023 Pi-hole, LLC (https://pi-hole.net)
* Network-wide ad blocking via your own hardware.
*
* FTL Engine
* Compression routines
*
* This file is copyright under the latest version of the EUPL.
* Please see LICENSE file for your rights under this license. */
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "miniz.h"
#include "compression.h"
#include "log.h"
bool compress_file(const char *infile, const char *outfile, bool verbose)
{
// Read entire file into memory
FILE *fp = fopen(infile, "rb");
if(fp == NULL)
{
log_warn("Failed to open %s: %s (%d)", infile, strerror(errno), errno);
return false;
}
// Get file size
fseek(fp, 0, SEEK_END);
const mz_ulong size = ftell(fp);
fseek(fp, 0, SEEK_SET);
// Read file into memory
unsigned char *buffer = malloc(size);
if(buffer == NULL)
{
log_warn("Failed to allocate %lu bytes of memory", (unsigned long)size);
fclose(fp);
return false;
}
if(fread(buffer, 1, size, fp) != size)
{
log_warn("Failed to read %lu bytes from %s", (unsigned long)size, infile);
fclose(fp);
free(buffer);
return false;
}
fclose(fp);
// Allocate memory for compressed file
// (compressBound() returns the maximum size of the compressed data)
mz_ulong size_compressed = compressBound(size);
unsigned char *buffer_compressed = malloc(size_compressed);
if(buffer_compressed == NULL)
{
log_warn("Failed to allocate %lu bytes of memory", (unsigned long)size_compressed);
free(buffer);
return false;
}
// Compress file (ZLIB stream format - not GZIP! - see https://tools.ietf.org/html/rfc1950)
int ret = compress2(buffer_compressed, &size_compressed, buffer, size, Z_BEST_COMPRESSION);
if(ret != Z_OK)
{
log_warn("Failed to compress %s: %s (%d)", infile, zError(ret), ret);
free(buffer);
free(buffer_compressed);
return false;
}
// Create compressed file
fp = fopen(outfile, "wb");
if(fp == NULL)
{
log_warn("Failed to open %s: %s (%d)", outfile, strerror(errno), errno);
free(buffer);
free(buffer_compressed);
return false;
}
// Generate GZIP header (without timestamp and extra flags)
// (see https://tools.ietf.org/html/rfc1952#section-2.3)
//
// 0 1 2 3 4 5 6 7 8 9
// +---+---+---+---+---+---+---+---+---+---+
// |ID1|ID2|CM |FLG| MTIME |XFL|OS | (more-->)
// +---+---+---+---+---+---+---+---+---+---+
//
// 1F8B: magic number
// 08: compression method (deflate)
// 01: flags (FTEXT is set)
// 00000000: timestamp (set later). For simplicity, we set it to the current time
// 02: extra flags (maximum compression)
// 03: operating system (Unix)
const unsigned char gzip_header[] = { 0x1F, 0x8B, 0x08, 0x01, 0x00, 0x00, 0x00, 0x00, 0x02, 0x03 };
// Set timestamp
uint32_t now = htole32(time(NULL));
memcpy((void*)(gzip_header+4), &now, sizeof(now));
// Write header
if(fwrite(gzip_header, 1, sizeof(gzip_header), fp) != sizeof(gzip_header))
{
log_warn("Failed to write GZIP header to %s", outfile);
fclose(fp);
free(buffer);
free(buffer_compressed);
return false;
}
// Write compressed data, strip ZLIB header (first two bytes) and footer (last four bytes)
// +=======================+
// |...compressed blocks...| (more-->)
// +=======================+
if(fwrite(buffer_compressed + 2, 1, size_compressed - (2 + 4), fp) != size_compressed - (2 + 4))
{
log_warn("Failed to write %lu bytes to %s", (unsigned long)size_compressed, outfile);
fclose(fp);
free(buffer);
free(buffer_compressed);
return false;
}
// Write GZIP footer (CRC32 and uncompressed size)
// (see https://tools.ietf.org/html/rfc1952#section-2.3)
//
// 0 1 2 3 4 5 6 7
// +---+---+---+---+---+---+---+---+
// | CRC32 | ISIZE |
// +---+---+---+---+---+---+---+---+
//
// CRC32: This contains a Cyclic Redundancy Check value of the
// uncompressed data computed according to CRC-32 algorithm used in
// the ISO 3309 standard and in section 8.1.1.6.2 of ITU-T
// recommendation V.42. (See http://www.iso.ch for ordering ISO
// documents. See gopher://info.itu.ch for an online version of
// ITU-T V.42.)
// isize: This contains the size of the original (uncompressed) input
// data modulo 2^32 (little endian).
uint32_t crc = mz_crc32(MZ_CRC32_INIT, buffer, size);
uint32_t isize = htole32(size);
free(buffer);
if(fwrite(&crc, 1, sizeof(crc), fp) != sizeof(crc))
{
log_warn("Failed to write CRC32 to %s", outfile);
fclose(fp);
free(buffer_compressed);
return false;
}
if(fwrite(&isize, 1, sizeof(isize), fp) != sizeof(isize))
{
log_warn("Failed to write isize to %s", outfile);
fclose(fp);
free(buffer_compressed);
return false;
}
fclose(fp);
free(buffer_compressed);
if(verbose)
{
// Print compression ratio
// Compressed size = size of compressed data
// + 10 bytes for GZIP header
// + 8 bytes for GZIP footer
const size_t csize = size_compressed - (2 + 4) + 10 + 8;
double raw_size, comp_size;
char raw_prefix[2], comp_prefix[2];
format_memory_size(raw_prefix, size, &raw_size);
format_memory_size(comp_prefix, csize, &comp_size);
log_info("Compressed %s (%.1f%sB) to %s (%.1f%sB), %.1f%% size reduction",
infile, raw_size, raw_prefix, outfile, comp_size, comp_prefix,
100.0 - 100.0*csize / size);
}
return true;
}

17
src/miniz/compression.h Normal file
View File

@ -0,0 +1,17 @@
/* Pi-hole: A black hole for Internet advertisements
* (c) 2323 Pi-hole, LLC (https://pi-hole.net)
* Network-wide ad blocking via your own hardware.
*
* FTL Engine
* Compression routines
*
* This file is copyright under the latest version of the EUPL.
* Please see LICENSE file for your rights under this license. */
#ifndef COMPRESSION_H
#define COMPRESSION_H
#include <stdbool.h>
bool compress_file(const char *in, const char *out, bool verbose);
#endif // COMPRESSION_H