/* libSoX effect: Spectrogram (c) 2008-9 robs@users.sourceforge.net * * This library is free software; you can redistribute it and/or modify it * under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or (at * your option) any later version. * * This library is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser * General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this library; if not, write to the Free Software Foundation, * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #ifdef NDEBUG /* Enable assert always. */ #undef NDEBUG /* Must undef above assert.h or other that might include it. */ #endif #include "sox_i.h" #include "fft4g.h" #include #include #ifdef HAVE_LIBPNG_PNG_H #include #else #include #endif #include /* For SET_BINARY_MODE: */ #include #ifdef HAVE_IO_H #include #endif #define is_p2(x) !(x & (x - 1)) #define MAX_X_SIZE 200000 #if SSIZE_MAX < UINT32_MAX #define MAX_Y_SIZE 16384 /* avoid multiplication overflow on 32-bit systems */ #else #define MAX_Y_SIZE 200000 #endif typedef enum { Window_Hann, Window_Hamming, Window_Bartlett, Window_Rectangular, Window_Kaiser, Window_Dolph } win_type_t; static const lsx_enum_item window_options[] = { LSX_ENUM_ITEM(Window_,Hann) LSX_ENUM_ITEM(Window_,Hamming) LSX_ENUM_ITEM(Window_,Bartlett) LSX_ENUM_ITEM(Window_,Rectangular) LSX_ENUM_ITEM(Window_,Kaiser) LSX_ENUM_ITEM(Window_,Dolph) {0, 0} }; typedef struct { /* Parameters */ double pixels_per_sec; double window_adjust; int x_size0; int y_size; int Y_size; int dB_range; int gain; int spectrum_points; int perm; sox_bool monochrome; sox_bool light_background; sox_bool high_colour; sox_bool slack_overlap; sox_bool no_axes; sox_bool normalize; sox_bool raw; sox_bool alt_palette; sox_bool truncate; win_type_t win_type; const char *out_name; const char *title; const char *comment; const char *duration_str; const char *start_time_str; sox_bool using_stdout; /* output image to stdout */ /* Shared work area */ double *shared; double **shared_ptr; /* Per-channel work area */ int WORK; /* Start of work area is marked by this dummy variable. */ uint64_t skip; int dft_size; int step_size; int block_steps; int block_num; int rows; int cols; int read; int x_size; int end; int end_min; int last_end; sox_bool truncated; double *buf; /* [dft_size] */ double *dft_buf; /* [dft_size] */ double *window; /* [dft_size + 1] */ double block_norm; double max; double *magnitudes; /* [dft_size / 2 + 1] */ float *dBfs; } priv_t; #define secs(cols) \ ((double)(cols) * p->step_size * p->block_steps / effp->in_signal.rate) static const unsigned char alt_palette[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x01, 0x05, 0x00, 0x01, 0x08, 0x00, 0x01, 0x0a, 0x00, 0x01, 0x0b, 0x00, 0x01, 0x0e, 0x01, 0x02, 0x10, 0x01, 0x02, 0x12, 0x01, 0x02, 0x15, 0x01, 0x02, 0x16, 0x01, 0x02, 0x18, 0x01, 0x03, 0x1b, 0x01, 0x03, 0x1d, 0x01, 0x03, 0x1f, 0x01, 0x03, 0x20, 0x01, 0x03, 0x22, 0x01, 0x03, 0x24, 0x01, 0x03, 0x25, 0x01, 0x03, 0x27, 0x01, 0x03, 0x28, 0x01, 0x03, 0x2a, 0x01, 0x03, 0x2c, 0x01, 0x03, 0x2e, 0x01, 0x03, 0x2f, 0x01, 0x03, 0x30, 0x01, 0x03, 0x32, 0x01, 0x03, 0x34, 0x02, 0x03, 0x36, 0x04, 0x03, 0x38, 0x05, 0x03, 0x39, 0x07, 0x03, 0x3b, 0x09, 0x03, 0x3d, 0x0b, 0x03, 0x3f, 0x0e, 0x03, 0x41, 0x0f, 0x02, 0x42, 0x11, 0x02, 0x44, 0x13, 0x02, 0x46, 0x15, 0x02, 0x48, 0x17, 0x02, 0x4a, 0x18, 0x02, 0x4b, 0x1a, 0x02, 0x4d, 0x1d, 0x02, 0x4f, 0x20, 0x02, 0x51, 0x24, 0x02, 0x53, 0x28, 0x02, 0x55, 0x2b, 0x02, 0x57, 0x30, 0x02, 0x5a, 0x33, 0x02, 0x5c, 0x37, 0x02, 0x5f, 0x3b, 0x02, 0x61, 0x3e, 0x02, 0x63, 0x42, 0x02, 0x65, 0x45, 0x02, 0x68, 0x49, 0x02, 0x6a, 0x4d, 0x02, 0x6c, 0x51, 0x02, 0x6e, 0x55, 0x02, 0x70, 0x5a, 0x02, 0x72, 0x5f, 0x02, 0x74, 0x63, 0x02, 0x75, 0x68, 0x02, 0x76, 0x6c, 0x02, 0x78, 0x70, 0x03, 0x7a, 0x75, 0x03, 0x7c, 0x7a, 0x03, 0x7d, 0x7e, 0x03, 0x7e, 0x83, 0x03, 0x80, 0x87, 0x03, 0x82, 0x8c, 0x03, 0x84, 0x90, 0x03, 0x85, 0x93, 0x03, 0x83, 0x96, 0x03, 0x80, 0x98, 0x03, 0x7e, 0x9b, 0x03, 0x7c, 0x9e, 0x03, 0x7a, 0xa0, 0x03, 0x78, 0xa3, 0x03, 0x75, 0xa6, 0x03, 0x73, 0xa9, 0x03, 0x71, 0xab, 0x03, 0x6f, 0xae, 0x03, 0x6d, 0xb1, 0x03, 0x6a, 0xb3, 0x03, 0x68, 0xb6, 0x03, 0x66, 0xba, 0x03, 0x62, 0xbc, 0x03, 0x5e, 0xc0, 0x03, 0x5a, 0xc3, 0x03, 0x56, 0xc7, 0x03, 0x52, 0xca, 0x03, 0x4e, 0xcd, 0x03, 0x4a, 0xd1, 0x03, 0x46, 0xd4, 0x03, 0x43, 0xd7, 0x03, 0x3e, 0xdb, 0x03, 0x3a, 0xde, 0x03, 0x36, 0xe2, 0x03, 0x32, 0xe4, 0x03, 0x2f, 0xe6, 0x07, 0x2d, 0xe8, 0x0d, 0x2c, 0xea, 0x11, 0x2b, 0xec, 0x17, 0x2a, 0xed, 0x1b, 0x29, 0xee, 0x20, 0x28, 0xf0, 0x26, 0x27, 0xf2, 0x2a, 0x26, 0xf4, 0x2f, 0x24, 0xf5, 0x34, 0x23, 0xf6, 0x39, 0x23, 0xf8, 0x3e, 0x21, 0xfa, 0x43, 0x20, 0xfc, 0x49, 0x20, 0xfc, 0x4f, 0x22, 0xfc, 0x56, 0x26, 0xfc, 0x5d, 0x2a, 0xfc, 0x64, 0x2c, 0xfc, 0x6b, 0x30, 0xfc, 0x72, 0x33, 0xfc, 0x7a, 0x37, 0xfd, 0x81, 0x3b, 0xfd, 0x88, 0x3e, 0xfd, 0x8f, 0x42, 0xfd, 0x96, 0x45, 0xfd, 0x9e, 0x49, 0xfd, 0xa5, 0x4d, 0xfd, 0xac, 0x50, 0xfd, 0xb1, 0x54, 0xfd, 0xb7, 0x58, 0xfd, 0xbc, 0x5c, 0xfd, 0xc1, 0x61, 0xfd, 0xc6, 0x65, 0xfd, 0xcb, 0x69, 0xfd, 0xd0, 0x6d, 0xfe, 0xd5, 0x71, 0xfe, 0xda, 0x76, 0xfe, 0xdf, 0x7a, 0xfe, 0xe4, 0x7e, 0xfe, 0xe9, 0x82, 0xfe, 0xee, 0x86, 0xfe, 0xf3, 0x8b, 0xfd, 0xf5, 0x8f, 0xfc, 0xf6, 0x93, 0xfb, 0xf7, 0x98, 0xfa, 0xf7, 0x9c, 0xf9, 0xf8, 0xa1, 0xf8, 0xf9, 0xa5, 0xf7, 0xf9, 0xaa, 0xf6, 0xfa, 0xae, 0xf5, 0xfa, 0xb3, 0xf4, 0xfb, 0xb7, 0xf3, 0xfc, 0xbc, 0xf1, 0xfd, 0xc0, 0xf0, 0xfd, 0xc5, 0xf0, 0xfe, 0xc9, 0xef, 0xfe, 0xcc, 0xef, 0xfe, 0xcf, 0xf0, 0xfe, 0xd1, 0xf0, 0xfe, 0xd4, 0xf0, 0xfe, 0xd6, 0xf0, 0xfe, 0xd8, 0xf0, 0xfe, 0xda, 0xf1, 0xff, 0xdd, 0xf1, 0xff, 0xdf, 0xf1, 0xff, 0xe1, 0xf1, 0xff, 0xe4, 0xf1, 0xff, 0xe6, 0xf2, 0xff, 0xe8, }; #define alt_palette_len (array_length(alt_palette) / 3) static int getopts(sox_effect_t *effp, int argc, char **argv) { priv_t *p = effp->priv; uint64_t dummy; const char *next; int c; lsx_getopt_t optstate; lsx_getopt_init(argc, argv, "+S:d:x:X:y:Y:z:Z:q:p:W:w:st:c:AarmnlhTo:", NULL, lsx_getopt_flag_none, 1, &optstate); p->dB_range = 120; p->spectrum_points = 249; p->perm = 1; p->out_name = "spectrogram.png"; p->comment = "Created by SoX"; while ((c = lsx_getopt(&optstate)) != -1) { switch (c) { GETOPT_NUMERIC(optstate, 'x', x_size0, 100, MAX_X_SIZE) GETOPT_NUMERIC(optstate, 'X', pixels_per_sec, 1, 5000) GETOPT_NUMERIC(optstate, 'y', y_size, 64, MAX_Y_SIZE) GETOPT_NUMERIC(optstate, 'Y', Y_size, 130, MAX_Y_SIZE) GETOPT_NUMERIC(optstate, 'z', dB_range, 20, 180) GETOPT_NUMERIC(optstate, 'Z', gain, -100, 100) GETOPT_NUMERIC(optstate, 'q', spectrum_points, 0, p->spectrum_points) GETOPT_NUMERIC(optstate, 'p', perm, 1, 6) GETOPT_NUMERIC(optstate, 'W', window_adjust, -10, 10) case 'w': p->win_type = lsx_enum_option(c, optstate.arg, window_options); break; case 's': p->slack_overlap = sox_true; break; case 'A': p->alt_palette = sox_true; break; case 'a': p->no_axes = sox_true; break; case 'r': p->raw = sox_true; break; case 'm': p->monochrome = sox_true; break; case 'n': p->normalize = sox_true; break; case 'l': p->light_background = sox_true; break; case 'h': p->high_colour = sox_true; break; case 'T': p->truncate = sox_true; break; case 't': p->title = optstate.arg; break; case 'c': p->comment = optstate.arg; break; case 'o': p->out_name = optstate.arg; break; case 'S': next = lsx_parseposition(0, optstate.arg, NULL, 0, 0, '='); if (next && !*next) { p->start_time_str = lsx_strdup(optstate.arg); break; } return lsx_usage(effp); case 'd': next = lsx_parsesamples(1e5, optstate.arg, &dummy, 't'); if (next && !*next) { p->duration_str = lsx_strdup(optstate.arg); break; } return lsx_usage(effp); default: lsx_fail("invalid option `-%c'", optstate.opt); return lsx_usage(effp); } } if (!!p->x_size0 + !!p->pixels_per_sec + !!p->duration_str > 2) { lsx_fail("only two of -x, -X, -d may be given"); return SOX_EOF; } if (p->y_size && p->Y_size) { lsx_fail("only one of -y, -Y may be given"); return SOX_EOF; } p->gain = -p->gain; --p->perm; p->spectrum_points += 2; if (p->alt_palette) p->spectrum_points = min(p->spectrum_points, alt_palette_len); p->shared_ptr = &p->shared; if (!strcmp(p->out_name, "-")) { if (effp->global_info->global_info->stdout_in_use_by) { lsx_fail("stdout already in use by `%s'", effp->global_info->global_info->stdout_in_use_by); return SOX_EOF; } effp->global_info->global_info->stdout_in_use_by = effp->handler.name; p->using_stdout = sox_true; } return optstate.ind != argc || p->win_type == INT_MAX ? lsx_usage(effp) : SOX_SUCCESS; } static double make_window(priv_t *p, int end) { double sum = 0; double *w = end < 0 ? p->window : p->window + end; double beta; int n = 1 + p->dft_size - abs(end); int i; if (end) memset(p->window, 0, sizeof(*p->window) * (p->dft_size + 1)); for (i = 0; i < n; ++i) w[i] = 1; switch (p->win_type) { case Window_Hann: lsx_apply_hann(w, n); break; case Window_Hamming: lsx_apply_hamming(w, n); break; case Window_Bartlett: lsx_apply_bartlett(w, n); break; case Window_Rectangular: break; case Window_Kaiser: beta = lsx_kaiser_beta((p->dB_range + p->gain) * (1.1 + p->window_adjust / 50), .1); lsx_apply_kaiser(w, n, beta); break; default: lsx_apply_dolph(w, n, (p->dB_range + p->gain) * (1.005 + p->window_adjust / 50) + 6); } for (i = 0; i < p->dft_size; ++i) sum += p->window[i]; /* empirical small window adjustment */ for (--n, i = 0; i < p->dft_size; ++i) p->window[i] *= 2 / sum * sqr((double)n / p->dft_size); return sum; } static double *rdft_init(size_t n) { double *q = lsx_malloc(2 * (n / 2 + 1) * n * sizeof(*q)); double *p = q; int i, j; for (j = 0; j <= n / 2; ++j) { for (i = 0; i < n; ++i) { *p++ = cos(2 * M_PI * j * i / n); *p++ = sin(2 * M_PI * j * i / n); } } return q; } #define _ re += in[i] * *q++, im += in[i++] * *q++, static void rdft_p(const double *q, const double *in, double *out, int n) { int i, j; for (j = 0; j <= n / 2; ++j) { double re = 0, im = 0; for (i = 0; i < (n & ~7);) _ _ _ _ _ _ _ _ (void)0; while (i < n) _ (void)0; *out++ += re * re + im * im; } } static int start(sox_effect_t *effp) { priv_t *p = effp->priv; double actual; double duration = 0.0; double start_time = 0.0; double pixels_per_sec = p->pixels_per_sec; uint64_t d; memset(&p->WORK, 0, sizeof(*p) - field_offset(priv_t, WORK)); if (p->duration_str) { lsx_parsesamples(effp->in_signal.rate, p->duration_str, &d, 't'); duration = d / effp->in_signal.rate; } if (p->start_time_str) { uint64_t in_length = effp->in_signal.length != SOX_UNKNOWN_LEN ? effp->in_signal.length / effp->in_signal.channels : SOX_UNKNOWN_LEN; if (!lsx_parseposition(effp->in_signal.rate, p->start_time_str, &d, 0, in_length, '=') || d == SOX_UNKNOWN_LEN) { lsx_fail("-S option: audio length is unknown"); return SOX_EOF; } start_time = d / effp->in_signal.rate; p->skip = d; } p->x_size = p->x_size0; while (sox_true) { if (!pixels_per_sec && p->x_size && duration) pixels_per_sec = min(5000, p->x_size / duration); else if (!p->x_size && pixels_per_sec && duration) p->x_size = min(MAX_X_SIZE, (int)(pixels_per_sec * duration + .5)); if (!duration && effp->in_signal.length != SOX_UNKNOWN_LEN) { duration = effp->in_signal.length / (effp->in_signal.rate * effp->in_signal.channels); duration -= start_time; if (duration <= 0) duration = 1; continue; } else if (!p->x_size) { p->x_size = 800; continue; } else if (!pixels_per_sec) { pixels_per_sec = 100; continue; } break; } if (p->y_size) { p->dft_size = 2 * (p->y_size - 1); if (!is_p2(p->dft_size) && !effp->flow) p->shared = rdft_init(p->dft_size); } else { int y = max(32, (p->Y_size? p->Y_size : 550) / effp->in_signal.channels - 2); for (p->dft_size = 128; p->dft_size <= y; p->dft_size <<= 1); } /* Now that dft_size is set, allocate variable-sized elements of priv_t */ p->buf = lsx_calloc(p->dft_size, sizeof(*p->buf)); p->dft_buf = lsx_calloc(p->dft_size, sizeof(*p->dft_buf)); p->window = lsx_calloc(p->dft_size + 1, sizeof(*p->window)); p->magnitudes = lsx_calloc(p->dft_size / 2 + 1, sizeof(*p->magnitudes)); if (is_p2(p->dft_size) && !effp->flow) lsx_safe_rdft(p->dft_size, 1, p->dft_buf); lsx_debug("duration=%g x_size=%i pixels_per_sec=%g dft_size=%i", duration, p->x_size, pixels_per_sec, p->dft_size); p->end = p->dft_size; p->rows = (p->dft_size >> 1) + 1; actual = make_window(p, p->last_end = 0); lsx_debug("window_density=%g", actual / p->dft_size); p->step_size = (p->slack_overlap ? sqrt(actual * p->dft_size) : actual) + 0.5; p->block_steps = effp->in_signal.rate / pixels_per_sec; p->step_size = p->block_steps / ceil((double)p->block_steps / p->step_size) + 0.5; p->block_steps = floor((double)p->block_steps / p->step_size + 0.5); p->block_norm = 1.0 / p->block_steps; actual = effp->in_signal.rate / p->step_size / p->block_steps; if (!effp->flow && actual != pixels_per_sec) lsx_report("actual pixels/s = %g", actual); lsx_debug("step_size=%i block_steps=%i", p->step_size, p->block_steps); p->max = -p->dB_range; p->read = (p->step_size - p->dft_size) / 2; return SOX_SUCCESS; } static int do_column(sox_effect_t *effp) { priv_t *p = effp->priv; int i; if (p->cols == p->x_size) { p->truncated = sox_true; if (!effp->flow) lsx_report("PNG truncated at %g seconds", secs(p->cols)); return p->truncate ? SOX_EOF : SOX_SUCCESS; } ++p->cols; p->dBfs = lsx_realloc(p->dBfs, p->cols * p->rows * sizeof(*p->dBfs)); /* FIXME: allocate in larger steps (for several columns) */ for (i = 0; i < p->rows; ++i) { double dBfs = 10 * log10(p->magnitudes[i] * p->block_norm); p->dBfs[(p->cols - 1) * p->rows + i] = dBfs + p->gain; p->max = max(dBfs, p->max); } memset(p->magnitudes, 0, p->rows * sizeof(*p->magnitudes)); p->block_num = 0; return SOX_SUCCESS; } static int flow(sox_effect_t *effp, const sox_sample_t *ibuf, sox_sample_t *obuf, size_t *isamp, size_t *osamp) { priv_t *p = effp->priv; size_t len = *isamp = *osamp = min(*isamp, *osamp); int i; memcpy(obuf, ibuf, len * sizeof(*obuf)); /* Pass on audio unaffected */ if (p->skip) { if (p->skip >= len) { p->skip -= len; return SOX_SUCCESS; } ibuf += p->skip; len -= p->skip; p->skip = 0; } while (!p->truncated) { if (p->read == p->step_size) { memmove(p->buf, p->buf + p->step_size, (p->dft_size - p->step_size) * sizeof(*p->buf)); p->read = 0; } for (; len && p->read < p->step_size; --len, ++p->read, --p->end) p->buf[p->dft_size - p->step_size + p->read] = SOX_SAMPLE_TO_FLOAT_64BIT(*ibuf++,); if (p->read != p->step_size) break; if ((p->end = max(p->end, p->end_min)) != p->last_end) make_window(p, p->last_end = p->end); for (i = 0; i < p->dft_size; ++i) p->dft_buf[i] = p->buf[i] * p->window[i]; if (is_p2(p->dft_size)) { lsx_safe_rdft(p->dft_size, 1, p->dft_buf); p->magnitudes[0] += sqr(p->dft_buf[0]); for (i = 1; i < p->dft_size >> 1; ++i) p->magnitudes[i] += sqr(p->dft_buf[2*i]) + sqr(p->dft_buf[2*i+1]); p->magnitudes[p->dft_size >> 1] += sqr(p->dft_buf[1]); } else rdft_p(*p->shared_ptr, p->dft_buf, p->magnitudes, p->dft_size); if (++p->block_num == p->block_steps && do_column(effp) == SOX_EOF) return SOX_EOF; } return SOX_SUCCESS; } static int drain(sox_effect_t *effp, sox_sample_t *obuf_, size_t *osamp) { priv_t *p = effp->priv; if (!p->truncated) { sox_sample_t * ibuf = lsx_calloc(p->dft_size, sizeof(*ibuf)); sox_sample_t * obuf = lsx_calloc(p->dft_size, sizeof(*obuf)); size_t isamp = (p->dft_size - p->step_size) / 2; int left_over = (isamp + p->read) % p->step_size; if (left_over >= p->step_size >> 1) isamp += p->step_size - left_over; lsx_debug("cols=%i left=%i end=%i", p->cols, p->read, p->end); p->end = 0, p->end_min = -p->dft_size; if (flow(effp, ibuf, obuf, &isamp, &isamp) == SOX_SUCCESS && p->block_num) { p->block_norm *= (double)p->block_steps / p->block_num; do_column(effp); } lsx_debug("flushed cols=%i left=%i end=%i", p->cols, p->read, p->end); free(obuf); free(ibuf); } *osamp = 0; return SOX_SUCCESS; } enum { Background, Text, Labels, Grid, fixed_palette }; static unsigned colour(const priv_t *p, double x) { unsigned c = x < -p->dB_range ? 0 : x >= 0 ? p->spectrum_points - 1 : 1 + (1 + x / p->dB_range) * (p->spectrum_points - 2); return fixed_palette + c; } static void make_palette(const priv_t *p, png_color *palette) { static const unsigned char black[] = { 0x00, 0x00, 0x00 }; static const unsigned char dgrey[] = { 0x3f, 0x3f, 0x3f }; static const unsigned char mgrey[] = { 0x7f, 0x7f, 0x7f }; static const unsigned char lgrey[] = { 0xbf, 0xbf, 0xbf }; static const unsigned char white[] = { 0xff, 0xff, 0xff }; static const unsigned char lbgnd[] = { 0xdd, 0xd8, 0xd0 }; static const unsigned char mbgnd[] = { 0xdf, 0xdf, 0xdf }; int i; if (p->light_background) { memcpy(palette++, p->monochrome ? mbgnd : lbgnd, 3); memcpy(palette++, black, 3); memcpy(palette++, dgrey, 3); memcpy(palette++, dgrey, 3); } else { memcpy(palette++, black, 3); memcpy(palette++, white, 3); memcpy(palette++, lgrey, 3); memcpy(palette++, mgrey, 3); } for (i = 0; i < p->spectrum_points; ++i) { double c[3]; double x = (double)i / (p->spectrum_points - 1); int at = p->light_background ? p->spectrum_points - 1 - i : i; if (p->monochrome) { c[2] = c[1] = c[0] = x; if (p->high_colour) { c[(1 + p->perm) % 3] = x < .4? 0 : 5 / 3. * (x - .4); if (p->perm < 3) c[(2 + p->perm) % 3] = x < .4? 0 : 5 / 3. * (x - .4); } palette[at].red = .5 + 255 * c[0]; palette[at].green= .5 + 255 * c[1]; palette[at].blue = .5 + 255 * c[2]; continue; } if (p->high_colour) { static const int states[3][7] = { { 4, 5, 0, 0, 2, 1, 1 }, { 0, 0, 2, 1, 1, 3, 2 }, { 4, 1, 1, 3, 0, 0, 2 }, }; int j, phase_num = min(7 * x, 6); for (j = 0; j < 3; ++j) { switch (states[j][phase_num]) { case 0: c[j] = 0; break; case 1: c[j] = 1; break; case 2: c[j] = sin((7 * x - phase_num) * M_PI / 2); break; case 3: c[j] = cos((7 * x - phase_num) * M_PI / 2); break; case 4: c[j] = 7 * x - phase_num; break; case 5: c[j] = 1 - (7 * x - phase_num); break; } } } else if (p->alt_palette) { int n = (double)i / (p->spectrum_points - 1) * (alt_palette_len - 1) + .5; c[0] = alt_palette[3 * n + 0] / 255.; c[1] = alt_palette[3 * n + 1] / 255.; c[2] = alt_palette[3 * n + 2] / 255.; } else { if (x < .13) c[0] = 0; else if (x < .73) c[0] = 1 * sin((x - .13) / .60 * M_PI / 2); else c[0] = 1; if (x < .60) c[1] = 0; else if (x < .91) c[1] = 1 * sin((x - .60) / .31 * M_PI / 2); else c[1] = 1; if (x < .60) c[2] = .5 * sin((x - .00) / .60 * M_PI); else if (x < .78) c[2] = 0; else c[2] = (x - .78) / .22; } palette[at].red = .5 + 255 * c[p->perm % 3]; palette[at].green= .5 + 255 * c[(1 + p->perm + (p->perm % 2)) % 3]; palette[at].blue = .5 + 255 * c[(2 + p->perm - (p->perm % 2)) % 3]; } } static const Bytef fixed[] = { 0x78, 0xda, 0x65, 0x54, 0xa1, 0xb6, 0xa5, 0x30, 0x0c, 0x44, 0x56, 0x56, 0x3e, 0x59, 0xf9, 0x24, 0x72, 0x65, 0x25, 0x32, 0x9f, 0x80, 0x7c, 0x32, 0x12, 0x59, 0x59, 0x89, 0x44, 0x22, 0x2b, 0xdf, 0x27, 0x3c, 0x79, 0xe5, 0xca, 0xfd, 0x0c, 0x64, 0xe5, 0x66, 0x92, 0x94, 0xcb, 0x9e, 0x9d, 0x7b, 0xb8, 0xe4, 0x4c, 0xa7, 0x61, 0x9a, 0x04, 0xa6, 0xe9, 0x81, 0x64, 0x98, 0x92, 0xc4, 0x44, 0xf4, 0x5e, 0x20, 0xea, 0xf2, 0x53, 0x22, 0x6d, 0xe7, 0xc9, 0x9f, 0x9f, 0x17, 0x34, 0x4b, 0xa3, 0x98, 0x32, 0xb5, 0x1d, 0x0b, 0xf9, 0x3c, 0xf3, 0x79, 0xec, 0x5f, 0x96, 0x67, 0xec, 0x8c, 0x29, 0x65, 0x20, 0xa5, 0x38, 0xe1, 0x0f, 0x10, 0x4a, 0x34, 0x8d, 0x5b, 0x7a, 0x3c, 0xb9, 0xbf, 0xf7, 0x00, 0x33, 0x34, 0x40, 0x7f, 0xd8, 0x63, 0x97, 0x84, 0x20, 0x49, 0x72, 0x2e, 0x05, 0x24, 0x55, 0x80, 0xb0, 0x94, 0xd6, 0x53, 0x0f, 0x80, 0x3d, 0x5c, 0x6b, 0x10, 0x51, 0x41, 0xdc, 0x25, 0xe2, 0x10, 0x2a, 0xc3, 0x50, 0x9c, 0x89, 0xf6, 0x1e, 0x23, 0xf8, 0x52, 0xbe, 0x5f, 0xce, 0x73, 0x2d, 0xe5, 0x92, 0x44, 0x6c, 0x7a, 0xf3, 0x6d, 0x79, 0x2a, 0x3b, 0x8f, 0xfb, 0xe6, 0x7a, 0xb7, 0xe3, 0x9e, 0xf4, 0xa6, 0x9e, 0xf5, 0xa1, 0x39, 0xc5, 0x70, 0xdb, 0xb7, 0x13, 0x28, 0x87, 0xb5, 0xdb, 0x9b, 0xd5, 0x59, 0xe2, 0xa3, 0xb5, 0xef, 0xb2, 0x8d, 0xb3, 0x74, 0xb9, 0x24, 0xbe, 0x96, 0x65, 0x61, 0xb9, 0x2e, 0xf7, 0x26, 0xd0, 0xe7, 0x82, 0x5f, 0x9c, 0x17, 0xff, 0xe5, 0x92, 0xab, 0x3f, 0xe2, 0x32, 0xf4, 0x87, 0x79, 0xae, 0x9e, 0x12, 0x39, 0xd9, 0x1b, 0x0c, 0xfe, 0x97, 0xb6, 0x22, 0xee, 0xab, 0x6a, 0xf6, 0xf3, 0xe7, 0xdc, 0x55, 0x53, 0x1c, 0x5d, 0xf9, 0x3f, 0xad, 0xf9, 0xde, 0xfa, 0x7a, 0xb5, 0x76, 0x1c, 0x96, 0xa7, 0x1a, 0xd4, 0x8f, 0xdc, 0xdf, 0xcf, 0x55, 0x34, 0x0e, 0xce, 0x7b, 0x4e, 0xf8, 0x19, 0xf5, 0xef, 0x69, 0x4c, 0x99, 0x79, 0x1b, 0x79, 0xb4, 0x89, 0x44, 0x37, 0xdf, 0x04, 0xa4, 0xb1, 0x90, 0x44, 0xe6, 0x01, 0xb1, 0xef, 0xed, 0x3e, 0x03, 0xe2, 0x93, 0xf3, 0x00, 0xc3, 0xbf, 0x0c, 0x5b, 0x8c, 0x41, 0x2c, 0x70, 0x1c, 0x60, 0xad, 0xed, 0xf4, 0x1f, 0xfa, 0x94, 0xe2, 0xbf, 0x0c, 0x87, 0xad, 0x1e, 0x5f, 0x56, 0x07, 0x1c, 0xa1, 0x5e, 0xce, 0x57, 0xaf, 0x7f, 0x08, 0xa2, 0xc0, 0x1c, 0x0c, 0xbe, 0x1b, 0x3f, 0x2f, 0x39, 0x5f, 0xd9, 0x66, 0xe6, 0x1e, 0x15, 0x59, 0x29, 0x98, 0x31, 0xaf, 0xa1, 0x34, 0x7c, 0x1d, 0xf5, 0x9f, 0xe2, 0x34, 0x6b, 0x03, 0xa4, 0x03, 0xa2, 0xb1, 0x06, 0x08, 0xbd, 0x3e, 0x7a, 0x24, 0xf8, 0x8d, 0x3a, 0xb8, 0xf3, 0x77, 0x1e, 0x2f, 0xb5, 0x6b, 0x46, 0x0b, 0x10, 0x6f, 0x36, 0xa2, 0xc1, 0xf4, 0xde, 0x17, 0xd5, 0xaf, 0xd1, 0xf4, 0x66, 0x73, 0x99, 0x8d, 0x47, 0x1a, 0x3d, 0xaf, 0xc5, 0x44, 0x69, 0xc4, 0x5e, 0x7f, 0xc4, 0x52, 0xf4, 0x51, 0x3d, 0x95, 0xfb, 0x1b, 0xd0, 0xfd, 0xfd, 0xfa, 0x80, 0xdf, 0x1f, 0xfc, 0x7d, 0xdc, 0xdf, 0x10, 0xf4, 0xc8, 0x28, 0x5d, 0xc4, 0xb7, 0x62, 0x7f, 0xd6, 0x59, 0x72, 0x6a, 0xca, 0xbf, 0xfb, 0x9b, 0x1f, 0xe0, }; static unsigned char *font; #define font_x 5 #define font_y 12 #define font_X (font_x + 1) #define pixel(x,y) pixels[(y) * cols + (x)] #define print_at(x,y,c,t) print_at_(pixels,cols,x,y,c,t,0) #define print_up(x,y,c,t) print_at_(pixels,cols,x,y,c,t,1) static void print_at_(png_byte *pixels, int cols, int x, int y, int c, const char *text, int orientation) { for (; *text; ++text) { int pos = ((*text < ' ' || *text > '~'? '~' + 1 : *text) - ' ') * font_y; int i, j; for (i = 0; i < font_y; ++i) { unsigned line = font[pos++]; for (j = 0; j < font_x; ++j, line <<= 1) { if (line & 0x80) { switch (orientation) { case 0: pixel(x + j, y - i) = c; break; case 1: pixel(x + i, y + j) = c; break; } } } } switch (orientation) { case 0: x += font_X; break; case 1: y += font_X; break; } } } static int axis(double to, int max_steps, double *limit, char **prefix) { double scale = 1, step = max(1, 10 * to); int i, prefix_num = 0; if (max_steps) { double try; double log_10 = HUGE_VAL; double min_step = (to *= 10) / max_steps; for (i = 5; i; i >>= 1) { if ((try = ceil(log10(min_step * i))) <= log_10) { step = pow(10., log_10 = try) / i; log_10 -= i > 1; } } prefix_num = floor(log_10 / 3); scale = pow(10., -3. * prefix_num); } *prefix = "pnum-kMGTPE" + prefix_num + (prefix_num? 4 : 11); *limit = to * scale; return step * scale + .5; } #define below 48 #define left 58 #define between 37 #define spectrum_width 14 #define right 35 static int stop(sox_effect_t *effp) /* only called, by end(), on flow 0 */ { priv_t *p = effp->priv; FILE *file; uLong font_len = 96 * font_y; int chans = effp->in_signal.channels; int c_rows = p->rows * chans + chans - 1; int rows = p->raw? c_rows : below + c_rows + 30 + 20 * !!p->title; int cols = p->raw? p->cols : left + p->cols + between + spectrum_width + right; png_byte *pixels = lsx_malloc(cols * rows * sizeof(*pixels)); png_bytepp png_rows = lsx_malloc(rows * sizeof(*png_rows)); png_structp png = png_create_write_struct(PNG_LIBPNG_VER_STRING, 0, 0,0); png_infop png_info = png_create_info_struct(png); png_color palette[256]; int i, j, k; int base; int step; int tick_len = 3 - p->no_axes; char text[200]; char *prefix; double limit; float autogain = 0.0; /* Is changed if the -n flag was supplied */ free(p->shared); if (p->using_stdout) { SET_BINARY_MODE(stdout); file = stdout; } else { file = fopen(p->out_name, "wb"); if (!file) { lsx_fail("failed to create `%s': %s", p->out_name, strerror(errno)); goto error; } } lsx_debug("signal-max=%g", p->max); font = lsx_malloc(font_len); assert(uncompress(font, &font_len, fixed, sizeof(fixed)) == Z_OK); make_palette(p, palette); memset(pixels, Background, cols * rows * sizeof(*pixels)); png_init_io(png, file); png_set_PLTE(png, png_info, palette, fixed_palette + p->spectrum_points); png_set_IHDR(png, png_info, cols, rows, 8, PNG_COLOR_TYPE_PALETTE, PNG_INTERLACE_NONE, PNG_COMPRESSION_TYPE_DEFAULT, PNG_FILTER_TYPE_DEFAULT); for (j = 0; j < rows; ++j) /* Put (0,0) at bottom-left of PNG */ png_rows[rows - 1 - j] = pixels + j * cols; /* Spectrogram */ if (p->normalize) /* values are already in dB, so we subtract the maximum value * (which will normally be negative) to raise the maximum to 0.0. */ autogain = -p->max; for (k = 0; k < chans; ++k) { priv_t *q = (effp - effp->flow + k)->priv; if (p->normalize) { float *fp = q->dBfs; for (i = p->rows * p->cols; i > 0; i--) *fp++ += autogain; } base = !p->raw * below + (chans - 1 - k) * (p->rows + 1); for (j = 0; j < p->rows; ++j) { for (i = 0; i < p->cols; ++i) pixel(!p->raw * left + i, base + j) = colour(p, q->dBfs[i*p->rows + j]); if (!p->raw && !p->no_axes) /* Y-axis lines */ pixel(left - 1, base + j) = pixel(left + p->cols, base + j) = Grid; } if (!p->raw && !p->no_axes) /* X-axis lines */ for (i = -1; i <= p->cols; ++i) pixel(left + i, base - 1) = pixel(left + i, base + p->rows) = Grid; } if (!p->raw) { if (p->title && (i = strlen(p->title) * font_X) < cols + 1) /* Title */ print_at((cols - i) / 2, rows - font_y, Text, p->title); if (strlen(p->comment) * font_X < cols + 1) /* Footer comment */ print_at(1, font_y, Text, p->comment); /* X-axis */ step = axis(secs(p->cols), p->cols / (font_X * 9 / 2), &limit, &prefix); sprintf(text, "Time (%.1ss)", prefix); /* Axis label */ print_at(left + (p->cols - font_X * strlen(text)) / 2, 24, Text, text); for (i = 0; i <= limit; i += step) { int x = limit ? (double)i / limit * p->cols + .5 : 0; int y; for (y = 0; y < tick_len; ++y) /* Ticks */ pixel(left-1+x, below-1-y) = pixel(left-1+x, below+c_rows+y) = Grid; if (step == 5 && (i%10)) continue; sprintf(text, "%g", .1 * i); /* Tick labels */ x = left + x - 3 * strlen(text); print_at(x, below - 6, Labels, text); print_at(x, below + c_rows + 14, Labels, text); } /* Y-axis */ step = axis(effp->in_signal.rate / 2, (p->rows - 1) / ((font_y * 3 + 1) >> 1), &limit, &prefix); sprintf(text, "Frequency (%.1sHz)", prefix); /* Axis label */ print_up(10, below + (c_rows - font_X * strlen(text)) / 2, Text, text); for (k = 0; k < chans; ++k) { base = below + k * (p->rows + 1); for (i = 0; i <= limit; i += step) { int y = limit ? (double)i / limit * (p->rows - 1) + .5 : 0; int x; for (x = 0; x < tick_len; ++x) /* Ticks */ pixel(left-1-x, base+y) = pixel(left+p->cols+x, base+y) = Grid; if ((step == 5 && (i%10)) || (!i && k && chans > 1)) continue; sprintf(text, i?"%5g":" DC", .1 * i); /* Tick labels */ print_at(left - 4 - font_X * 5, base + y + 5, Labels, text); sprintf(text, i?"%g":"DC", .1 * i); print_at(left + p->cols + 6, base + y + 5, Labels, text); } } /* Z-axis */ k = min(400, c_rows); base = below + (c_rows - k) / 2; print_at(cols - right - 2 - font_X, base - 13, Text, "dBFS");/* Axis label */ for (j = 0; j < k; ++j) { /* Spectrum */ png_byte b = colour(p, p->dB_range * (j / (k - 1.) - 1)); for (i = 0; i < spectrum_width; ++i) pixel(cols - right - 1 - i, base + j) = b; } step = 10 * ceil(p->dB_range / 10. * (font_y + 2) / (k - 1)); for (i = 0; i <= p->dB_range; i += step) { /* (Tick) labels */ int y = (double)i / p->dB_range * (k - 1) + .5; sprintf(text, "%+i", i - p->gain - p->dB_range - (int)(autogain+0.5)); print_at(cols - right + 1, base + y + 5, Labels, text); } } free(font); png_set_rows(png, png_info, png_rows); png_write_png(png, png_info, PNG_TRANSFORM_IDENTITY, NULL); if (!p->using_stdout) fclose(file); error: png_destroy_write_struct(&png, &png_info); free(png_rows); free(pixels); free(p->dBfs); free(p->buf); free(p->dft_buf); free(p->window); free(p->magnitudes); return SOX_SUCCESS; } static int end(sox_effect_t *effp) { priv_t *p = effp->priv; if (effp->flow == 0) return stop(effp); free(p->dBfs); return SOX_SUCCESS; } const sox_effect_handler_t *lsx_spectrogram_effect_fn(void) { static sox_effect_handler_t handler = { "spectrogram", 0, SOX_EFF_MODIFY, getopts, start, flow, drain, end, 0, sizeof(priv_t) }; static const char *lines[] = { "[options]", "\t-x num\tX-axis size in pixels; default derived or 800", "\t-X num\tX-axis pixels/second; default derived or 100", "\t-y num\tY-axis size in pixels (per channel); slow if not 1 + 2^n", "\t-Y num\tY-height total (i.e. not per channel); default 550", "\t-z num\tZ-axis range in dB; default 120", "\t-Z num\tZ-axis maximum in dBFS; default 0", "\t-n\tSet Z-axis maximum to the brightest pixel", "\t-q num\tZ-axis quantisation (0 - 249); default 249", "\t-w name\tWindow: Hann(default)/Hamming/Bartlett/Rectangular/Kaiser/Dolph", "\t-W num\tWindow adjust parameter (-10 - 10); applies only to Kaiser/Dolph", "\t-s\tSlack overlap of windows", "\t-a\tSuppress axis lines", "\t-r\tRaw spectrogram; no axes or legends", "\t-l\tLight background", "\t-m\tMonochrome", "\t-h\tHigh colour", "\t-p num\tPermute colours (1 - 6); default 1", "\t-A\tAlternative, inferior, fixed colour-set (for compatibility only)", "\t-t text\tTitle text", "\t-c text\tComment text", "\t-o text\tOutput file name; default `spectrogram.png'", "\t-d time\tAudio duration to fit to X-axis; e.g. 1:00, 48", "\t-S position\tStart the spectrogram at the given input position", }; static char *usage; handler.usage = lsx_usage_lines(&usage, lines, array_length(lines)); return &handler; }