/* --*- c -*-- * Copyright (C) 2016 Enrico Scholz * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; version 3 of the License. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include #include #ifdef HAVE_ASM_CACHECTL_H # include #endif #include "src/bayer2rgb.h" #include "src/bayer2rgb-cmdline.h" #include "src/compiler.h" #define BPP_TO_BYTE(_bpp) (((_bpp) + 7) / 8) struct timing_info { /* time when application was started */ struct timespec start; /* time when data was initialized (e.g. buffers are allocated and * input image was read) */ struct timespec init; /* time when all conversions have been */ struct timespec conversion_end; /* time when application has been (nearly) finished; only timing * information will be printed */ struct timespec end; uint64_t flush_time_ns; uint64_t convert_time_ns; }; struct x_image_in { struct image_in image; void const *memory; size_t total_size; }; struct x_image_out { struct image_out image; void const *memory; size_t total_size; }; inline static _always_inline_ bool have_neon(void) { #ifdef HAVE_NEON return true; #else return false; #endif } inline static _always_inline_ bool have_cplusplus() { #ifdef HAVE_CPLUSPLUS return true; #else return false; #endif } static void dump_image_info(struct image_info const *info, size_t total_size) { printf("bpp=%2u, dim=%ux%u, stride=%zu, memsz=%zu (%zux%zu)", info->bpp, info->w, info->h, info->stride, total_size, info->stride / BPP_TO_BYTE(info->bpp), total_size / info->stride); } static void dump_image_in_info(struct x_image_in const *img) { struct image_info const *info = &img->image.info; uintptr_t delta = ((uintptr_t)img->image.data - (uintptr_t)img->memory); unsigned int bpp = BPP_TO_BYTE(info->bpp); printf("type=%d, data=BASE+%lu (crop @[%lu,%lu]), info=[", img->image.type, (unsigned long)delta, (unsigned long)((delta % info->stride) / bpp), (unsigned long)((delta / info->stride))); dump_image_info(info, img->total_size); } static void dump_image_out_info(struct x_image_out const *img) { struct image_info const *info = &img->image.info; uintptr_t delta = ((uintptr_t)img->image.data - (uintptr_t)img->memory); unsigned int bpp = BPP_TO_BYTE(info->bpp); if (info->stride == 0) abort(); printf(" data=BASE+%lu (crop @[%lu,%lu]), info=[", (unsigned long)delta, (unsigned long)((delta % info->stride) / bpp), (unsigned long)((delta / info->stride))); dump_image_info(info, img->total_size); } static uint64_t timespec_to_ns(struct timespec const *t) { uint64_t res = t->tv_sec; res *= 1000000000ull; res += t->tv_nsec; return res; } static void add_time(uint64_t *ns, struct timespec const *end, struct timespec const *start) { *ns += timespec_to_ns(end) - timespec_to_ns(start); } static char const *fmt_ns(char *buf, uint64_t ns) { sprintf(buf, "%ld.%09ld", (unsigned long)(ns / 1000000000ull), (unsigned long)(ns % 1000000000ull)); return buf; } static char const *fmt_time_delta(char *buf, struct timespec const *end, struct timespec const *start) { return fmt_ns(buf, timespec_to_ns(end) - timespec_to_ns(start)); } static void write_ppm(int fd, struct x_image_out const *image) { struct image_info const *info = &image->image.info; size_t w = info->stride / BPP_TO_BYTE(info->bpp); size_t h = image->total_size / info->stride; size_t cnt = w * h; FILE *f; f = fdopen(fd, "w"); fprintf(f, "P3\n%zu %zu 255\n", w, h); switch (info->bpp) { case 32: { typedef struct rgbx32_pixel rgb_pixel_t; rgb_pixel_t const *data = image->memory; for (size_t i = 0; i < cnt; ++i, ++data) { char const *delim = ((i + 1) % 4 && i+1 != cnt) ? " " : "\n"; fprintf(f, "%-3d %-3d %-3d%s", data->r, data->g, data->b, delim); } break; } default: /* todo */ abort(); } fclose(f); } static bool write_all(int fd, void const *src, size_t len) { while (len > 0) { ssize_t l = write(fd, src, len); if (l > 0) { src += l; len -= l; } else if (l == 0) { fprintf(stderr, "failed to write data"); break; } else if (errno == EINTR) { continue; } else { perror("write()"); break; } } return len == 0; } int main(int argc, char *argv[]) { struct gengetopt_args_info args; int fd_in = -1; int fd_out = -1; size_t img_size; struct x_image_in image_in; struct x_image_out image_out; void *data_in; void *data_out; ssize_t l; unsigned int in_bpp; unsigned int in_endian; unsigned int out_bpp; unsigned long quality; struct image_conversion_info conv_info = {}; clockid_t const clk = CLOCK_PROCESS_CPUTIME_ID; struct timing_info timing = { .flush_time_ns = 0, }; void (*convert_fn)(struct image_in const *input, struct image_out const *output, struct image_conversion_info *info); clock_gettime(clk, &timing.start); if (cmdline_parser(argc, argv, &args) != 0) return EX_USAGE; if (!args.input_crop_w_given) args.input_crop_w_arg = (args.width_arg - args.input_crop_x_arg); if (!args.input_crop_h_given) args.input_crop_h_arg = (args.height_arg - args.input_crop_y_arg); if (!args.output_width_given) args.output_width_arg = args.input_crop_w_arg; if (!args.output_height_given) args.output_height_arg = args.input_crop_h_arg; if (!args.output_window_w_given) args.output_window_w_arg = (args.output_width_arg - args.output_window_x_arg); if (!args.output_window_h_given) args.output_window_h_arg = (args.output_height_arg - args.output_window_y_arg); if (!args.timing_given && args.loop_given) args.timing_flag = true; quality = 0; for (size_t i = 0; i < args.quality_given; ++i) { switch (args.quality_arg[i]) { case quality_arg_PLUS_all: quality = ~0ul; break; case quality_arg_MINUS_all: quality = 0; break; case quality_arg_PLUS_round: quality |= ((1u << QUALITY_ROUND_4) | (1u << QUALITY_ROUND_2)); break; case quality_arg_PLUS_round4: quality |= (1u << QUALITY_ROUND_4); break; case quality_arg_PLUS_round2: quality |= (1u << QUALITY_ROUND_2); break; case quality_arg_MINUS_round: quality &= ~((1u << QUALITY_ROUND_4) | (1u << QUALITY_ROUND_2)); break; case quality_arg_MINUS_round4: quality &= ~(1u << QUALITY_ROUND_4); break; case quality_arg_MINUS_round2: quality &= ~(1u << QUALITY_ROUND_2); break; case quality__NULL: abort(); } } convert_fn = NULL; switch (args.method_arg) { case method_arg_neon: if (have_neon()) convert_fn = bayer2rgb_convert_neon; break; case method_arg_cMINUS_opt: convert_fn = bayer2rgb_convert_c_opt; break; case method_arg_cc: if (have_cplusplus()) convert_fn = bayer2rgb_convert_cc; break; case method__NULL: abort(); } switch (args.input_endian_arg) { case input_endian_arg_le: in_endian = BAYER_E_LITTLE; break; case input_endian_arg_be: in_endian = BAYER_E_BIG; break; case input_endian_arg_native: if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) in_endian = BAYER_E_LITTLE; else in_endian = BAYER_E_BIG; break; case input_endian_arg_foreign: if (__BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__) in_endian = BAYER_E_LITTLE; else in_endian = BAYER_E_BIG; break; case input_endian__NULL: default: abort(); } if (!convert_fn) { if (!args.silent_flag) fprintf(stderr, "WARNING: optimization method not available; falling back to c-opt\n"); convert_fn = bayer2rgb_convert_c_opt; } if (args.input_crop_w_arg > args.width_arg) { fprintf(stderr, "invalid input crop width\n"); return EX_USAGE; } if (args.input_crop_h_arg > args.height_arg) { fprintf(stderr, "invalid input crop heigth\n"); return EX_USAGE; } if (args.output_window_w_arg > args.output_width_arg) { fprintf(stderr, "invalid output window width\n"); return EX_USAGE; } if (args.output_window_h_arg > args.output_height_arg) { fprintf(stderr, "invalid output window height\n"); return EX_USAGE; } in_bpp = args.input_bpp_arg; if (!args.output_bpp_given && args.output_format_arg == output_format_arg_rgb16) out_bpp = 16; else out_bpp = args.output_bpp_arg; if (args.output_window_w_arg <= 0 || args.output_window_h_arg <= 0 || args.output_width_arg <= 0 || args.output_height_arg <= 0 || args.width_arg <= 0 || args.height_arg <= 0) { fprintf(stderr, "invalid dimension (in: %dx%d[%dx%d], out: %dx%d[%dx%d])\n", args.width_arg, args.height_arg, args.input_crop_w_arg, args.input_crop_h_arg, args.output_width_arg, args.output_height_arg, args.output_window_w_arg, args.output_window_h_arg); return EX_USAGE; } if (args.inputs_num < 1) fd_in = STDIN_FILENO; else fd_in = open(args.inputs[0], O_RDONLY|O_CLOEXEC); if (fd_in < 0) { perror("open()"); return EX_NOINPUT; } if (args.inputs_num < 2) fd_out = STDOUT_FILENO; else fd_out = open(args.inputs[1], O_WRONLY|O_CREAT|O_CLOEXEC|O_TRUNC, 0666); if (fd_out < 0) { perror("open()"); close(fd_in); return EX_NOINPUT; } img_size = args.width_arg * args.height_arg * BPP_TO_BYTE(in_bpp); data_in = malloc(img_size); if (!data_in) abort(); l = read(fd_in, data_in, img_size); if (l < 0) { perror("read()"); return EX_DATAERR; } if ((size_t)l != img_size) { fprintf(stderr, "failed to read all image data\n"); return EX_DATAERR; } image_in = (struct x_image_in) { .image = { .data = ((uint8_t *)data_in + (args.width_arg * args.input_crop_y_arg + args.input_crop_x_arg) * BPP_TO_BYTE(in_bpp)), .info = { .bpp = in_bpp, .endian = in_endian, .w = args.input_crop_w_arg, .h = args.input_crop_h_arg, .stride = args.width_arg * BPP_TO_BYTE(in_bpp), }, }, .memory = data_in, .total_size = img_size, }; switch (args.input_format_arg) { case input_format_arg_gbrg: image_in.image.type = BAYER_GBRG; break; case input_format_arg_grbg: image_in.image.type = BAYER_GRBG; break; case input_format_arg_bggr: image_in.image.type = BAYER_BGGR; break; case input_format_arg_rggb: image_in.image.type = BAYER_RGGB; break; default: abort(); } img_size = (args.output_width_arg * args.output_height_arg * BPP_TO_BYTE(out_bpp)); data_out = malloc(img_size); if (!data_out) abort(); memset(data_out, 0x42, img_size); image_out = (struct x_image_out) { .image = { .data = (data_out + (args.output_width_arg * args.output_window_y_arg + args.output_window_x_arg) * BPP_TO_BYTE(out_bpp)), .quality= quality, .info = { .bpp = out_bpp, .w = args.output_window_w_arg, .h = args.output_window_h_arg, .stride = (args.output_width_arg * BPP_TO_BYTE(out_bpp)), }, }, .memory = data_out, .total_size = img_size, }; switch (args.output_format_arg) { case output_format_arg_rgb: case output_format_arg_rgbx: case output_format_arg_ppm: image_out.image.type = RGB_FMT_RGBx; break; case output_format_arg_bgrx: image_out.image.type = RGB_FMT_BGRx; break; case output_format_arg_xbgr: image_out.image.type = RGB_FMT_xBGR; break; case output_format_arg_xrgb: image_out.image.type = RGB_FMT_xRGB; break; case output_format_arg_rgb16: image_out.image.type = RGB_FMT_RGB16; break; default: abort(); } clock_gettime(clk, &timing.init); if (args.dump_info_flag) { printf("INPUT "); dump_image_in_info(&image_in); printf("\n"); printf("OUTPUT "); dump_image_out_info(&image_out); printf("\n"); goto out; } for (unsigned int cnt = args.loop_arg; cnt > 0; --cnt) { if (args.flush_cache_flag) { struct timespec a; struct timespec b; clock_gettime(clk, &a); /* TODO */ #ifdef HAVE_ASM_CACHECTL_H cacheflush(image_in.info.memory, image_in.info.total_size); cacheflush(image_out.info.memory, image_out.info.total_size); #endif clock_gettime(clk, &b); add_time(&timing.flush_time_ns, &b, &a); } { struct timespec a; struct timespec b; clock_gettime(clk, &a); convert_fn(&image_in.image, &image_out.image, args.loop_arg <= 1 ? &conv_info : NULL); clock_gettime(clk, &b); add_time(&timing.convert_time_ns, &b, &a); } } clock_gettime(clk, &timing.conversion_end); if (!args.silent_flag && conv_info.fallback_reason) fprintf(stderr, "NOTE: conversion falled back due to '%s'\n", conv_info.fallback_reason); switch (args.output_format_arg) { case output_format_arg_rgb16: case output_format_arg_rgb: case output_format_arg_rgbx: case output_format_arg_bgrx: case output_format_arg_xrgb: case output_format_arg_xbgr: if (!write_all(fd_out, image_out.memory, image_out.total_size)) return EX_OSERR; break; case output_format_arg_ppm: write_ppm(fd_out, &image_out); break; case output_format__NULL: abort(); } clock_gettime(clk, &timing.end); out: if (args.timing_flag) { char tmp_buf[64]; /* NOTE: do not merge the printf() statements; they can not * use a shared tmp_buf[]! */ printf("Total execution time: %s\n", fmt_time_delta(tmp_buf, &timing.end, &timing.start)); printf("Total conversion time: %s %.1f fps\n", fmt_time_delta(tmp_buf, &timing.conversion_end, &timing.init), (float)(args.loop_arg) * 1000000000. / (timespec_to_ns(&timing.conversion_end) - timespec_to_ns(&timing.init))); printf(" Raw conversion time: %s %.1f fps\n", fmt_ns(tmp_buf, timing.convert_time_ns), (float)(args.loop_arg) * 1000000000. / timing.convert_time_ns); printf(" Cache flish time: %s\n", fmt_ns(tmp_buf, timing.flush_time_ns)); printf("Initialization time: %s\n", fmt_time_delta(tmp_buf, &timing.init, &timing.start)); printf("Output time: %s\n", fmt_time_delta(tmp_buf, &timing.end, &timing.conversion_end)); } return EX_OK; }