123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414 |
- #include "darknet.h"
- #include <math.h>
- // ./darknet nightmare cfg/extractor.recon.cfg ~/trained/yolo-coco.conv frame6.png -reconstruct -iters 500 -i 3 -lambda .1 -rate .01 -smooth 2
- float abs_mean(float *x, int n)
- {
- int i;
- float sum = 0;
- for (i = 0; i < n; ++i){
- sum += fabs(x[i]);
- }
- return sum/n;
- }
- void calculate_loss(float *output, float *delta, int n, float thresh)
- {
- int i;
- float mean = mean_array(output, n);
- float var = variance_array(output, n);
- for(i = 0; i < n; ++i){
- if(delta[i] > mean + thresh*sqrt(var)) delta[i] = output[i];
- else delta[i] = 0;
- }
- }
- void optimize_picture(network *net, image orig, int max_layer, float scale, float rate, float thresh, int norm)
- {
- //scale_image(orig, 2);
- //translate_image(orig, -1);
- net->n = max_layer + 1;
- int dx = rand()%16 - 8;
- int dy = rand()%16 - 8;
- int flip = rand()%2;
- image crop = crop_image(orig, dx, dy, orig.w, orig.h);
- image im = resize_image(crop, (int)(orig.w * scale), (int)(orig.h * scale));
- if(flip) flip_image(im);
- resize_network(net, im.w, im.h);
- layer last = net->layers[net->n-1];
- //net->layers[net->n - 1].activation = LINEAR;
- image delta = make_image(im.w, im.h, im.c);
- #ifdef GPU
- net->delta_gpu = cuda_make_array(delta.data, im.w*im.h*im.c);
- copy_cpu(net->inputs, im.data, 1, net->input, 1);
- forward_network_gpu(net);
- copy_gpu(last.outputs, last.output_gpu, 1, last.delta_gpu, 1);
- cuda_pull_array(last.delta_gpu, last.delta, last.outputs);
- calculate_loss(last.delta, last.delta, last.outputs, thresh);
- cuda_push_array(last.delta_gpu, last.delta, last.outputs);
- backward_network_gpu(net);
- cuda_pull_array(net->delta_gpu, delta.data, im.w*im.h*im.c);
- cuda_free(net->delta_gpu);
- net->delta_gpu = 0;
- #else
- printf("\nnet: %d %d %d im: %d %d %d\n", net->w, net->h, net->inputs, im.w, im.h, im.c);
- copy_cpu(net->inputs, im.data, 1, net->input, 1);
- net->delta = delta.data;
- forward_network(net);
- copy_cpu(last.outputs, last.output, 1, last.delta, 1);
- calculate_loss(last.output, last.delta, last.outputs, thresh);
- backward_network(net);
- #endif
- if(flip) flip_image(delta);
- //normalize_array(delta.data, delta.w*delta.h*delta.c);
- image resized = resize_image(delta, orig.w, orig.h);
- image out = crop_image(resized, -dx, -dy, orig.w, orig.h);
- /*
- image g = grayscale_image(out);
- free_image(out);
- out = g;
- */
- //rate = rate / abs_mean(out.data, out.w*out.h*out.c);
- image gray = make_image(out.w, out.h, out.c);
- fill_image(gray, .5);
- axpy_cpu(orig.w*orig.h*orig.c, -1, orig.data, 1, gray.data, 1);
- axpy_cpu(orig.w*orig.h*orig.c, .1, gray.data, 1, out.data, 1);
- if(norm) normalize_array(out.data, out.w*out.h*out.c);
- axpy_cpu(orig.w*orig.h*orig.c, rate, out.data, 1, orig.data, 1);
- /*
- normalize_array(orig.data, orig.w*orig.h*orig.c);
- scale_image(orig, sqrt(var));
- translate_image(orig, mean);
- */
- //translate_image(orig, 1);
- //scale_image(orig, .5);
- //normalize_image(orig);
- constrain_image(orig);
- free_image(crop);
- free_image(im);
- free_image(delta);
- free_image(resized);
- free_image(out);
- }
- void smooth(image recon, image update, float lambda, int num)
- {
- int i, j, k;
- int ii, jj;
- for(k = 0; k < recon.c; ++k){
- for(j = 0; j < recon.h; ++j){
- for(i = 0; i < recon.w; ++i){
- int out_index = i + recon.w*(j + recon.h*k);
- for(jj = j-num; jj <= j + num && jj < recon.h; ++jj){
- if (jj < 0) continue;
- for(ii = i-num; ii <= i + num && ii < recon.w; ++ii){
- if (ii < 0) continue;
- int in_index = ii + recon.w*(jj + recon.h*k);
- update.data[out_index] += lambda * (recon.data[in_index] - recon.data[out_index]);
- }
- }
- }
- }
- }
- }
- void reconstruct_picture(network *net, float *features, image recon, image update, float rate, float momentum, float lambda, int smooth_size, int iters)
- {
- int iter = 0;
- for (iter = 0; iter < iters; ++iter) {
- image delta = make_image(recon.w, recon.h, recon.c);
- #ifdef GPU
- layer l = get_network_output_layer(net);
- cuda_push_array(net->input_gpu, recon.data, recon.w*recon.h*recon.c);
- //cuda_push_array(net->truth_gpu, features, net->truths);
- net->delta_gpu = cuda_make_array(delta.data, delta.w*delta.h*delta.c);
- forward_network_gpu(net);
- cuda_push_array(l.delta_gpu, features, l.outputs);
- axpy_gpu(l.outputs, -1, l.output_gpu, 1, l.delta_gpu, 1);
- backward_network_gpu(net);
- cuda_pull_array(net->delta_gpu, delta.data, delta.w*delta.h*delta.c);
- cuda_free(net->delta_gpu);
- #else
- net->input = recon.data;
- net->delta = delta.data;
- net->truth = features;
- forward_network(net);
- backward_network(net);
- #endif
- //normalize_array(delta.data, delta.w*delta.h*delta.c);
- axpy_cpu(recon.w*recon.h*recon.c, 1, delta.data, 1, update.data, 1);
- //smooth(recon, update, lambda, smooth_size);
- axpy_cpu(recon.w*recon.h*recon.c, rate, update.data, 1, recon.data, 1);
- scal_cpu(recon.w*recon.h*recon.c, momentum, update.data, 1);
- float mag = mag_array(delta.data, recon.w*recon.h*recon.c);
- printf("mag: %f\n", mag);
- //scal_cpu(recon.w*recon.h*recon.c, 600/mag, recon.data, 1);
- constrain_image(recon);
- free_image(delta);
- }
- }
- /*
- void run_lsd(int argc, char **argv)
- {
- srand(0);
- if(argc < 3){
- fprintf(stderr, "usage: %s %s [cfg] [weights] [image] [options! (optional)]\n", argv[0], argv[1]);
- return;
- }
- char *cfg = argv[2];
- char *weights = argv[3];
- char *input = argv[4];
- int norm = find_int_arg(argc, argv, "-norm", 1);
- int rounds = find_int_arg(argc, argv, "-rounds", 1);
- int iters = find_int_arg(argc, argv, "-iters", 10);
- float rate = find_float_arg(argc, argv, "-rate", .04);
- float momentum = find_float_arg(argc, argv, "-momentum", .9);
- float lambda = find_float_arg(argc, argv, "-lambda", .01);
- char *prefix = find_char_arg(argc, argv, "-prefix", 0);
- int reconstruct = find_arg(argc, argv, "-reconstruct");
- int smooth_size = find_int_arg(argc, argv, "-smooth", 1);
- network net = parse_network_cfg(cfg);
- load_weights(&net, weights);
- char *cfgbase = basecfg(cfg);
- char *imbase = basecfg(input);
- set_batch_network(&net, 1);
- image im = load_image_color(input, 0, 0);
- float *features = 0;
- image update;
- if (reconstruct){
- im = letterbox_image(im, net->w, net->h);
- int zz = 0;
- network_predict(net, im.data);
- image out_im = get_network_image(net);
- image crop = crop_image(out_im, zz, zz, out_im.w-2*zz, out_im.h-2*zz);
- //flip_image(crop);
- image f_im = resize_image(crop, out_im.w, out_im.h);
- free_image(crop);
- printf("%d features\n", out_im.w*out_im.h*out_im.c);
- im = resize_image(im, im.w, im.h);
- f_im = resize_image(f_im, f_im.w, f_im.h);
- features = f_im.data;
- int i;
- for(i = 0; i < 14*14*512; ++i){
- features[i] += rand_uniform(-.19, .19);
- }
- free_image(im);
- im = make_random_image(im.w, im.h, im.c);
- update = make_image(im.w, im.h, im.c);
- }
- int e;
- int n;
- for(e = 0; e < rounds; ++e){
- fprintf(stderr, "Iteration: ");
- fflush(stderr);
- for(n = 0; n < iters; ++n){
- fprintf(stderr, "%d, ", n);
- fflush(stderr);
- if(reconstruct){
- reconstruct_picture(net, features, im, update, rate, momentum, lambda, smooth_size, 1);
- //if ((n+1)%30 == 0) rate *= .5;
- show_image(im, "reconstruction");
- #ifdef OPENCV
- cvWaitKey(10);
- #endif
- }else{
- int layer = max_layer + rand()%range - range/2;
- int octave = rand()%octaves;
- optimize_picture(&net, im, layer, 1/pow(1.33333333, octave), rate, thresh, norm);
- }
- }
- fprintf(stderr, "done\n");
- char buff[256];
- if (prefix){
- sprintf(buff, "%s/%s_%s_%d_%06d",prefix, imbase, cfgbase, max_layer, e);
- }else{
- sprintf(buff, "%s_%s_%d_%06d",imbase, cfgbase, max_layer, e);
- }
- printf("%d %s\n", e, buff);
- save_image(im, buff);
- //show_image(im, buff);
- //cvWaitKey(0);
- if(rotate){
- image rot = rotate_image(im, rotate);
- free_image(im);
- im = rot;
- }
- image crop = crop_image(im, im.w * (1. - zoom)/2., im.h * (1.-zoom)/2., im.w*zoom, im.h*zoom);
- image resized = resize_image(crop, im.w, im.h);
- free_image(im);
- free_image(crop);
- im = resized;
- }
- }
- */
- void run_nightmare(int argc, char **argv)
- {
- srand(0);
- if(argc < 4){
- fprintf(stderr, "usage: %s %s [cfg] [weights] [image] [layer] [options! (optional)]\n", argv[0], argv[1]);
- return;
- }
- char *cfg = argv[2];
- char *weights = argv[3];
- char *input = argv[4];
- int max_layer = atoi(argv[5]);
- int range = find_int_arg(argc, argv, "-range", 1);
- int norm = find_int_arg(argc, argv, "-norm", 1);
- int rounds = find_int_arg(argc, argv, "-rounds", 1);
- int iters = find_int_arg(argc, argv, "-iters", 10);
- int octaves = find_int_arg(argc, argv, "-octaves", 4);
- float zoom = find_float_arg(argc, argv, "-zoom", 1.);
- float rate = find_float_arg(argc, argv, "-rate", .04);
- float thresh = find_float_arg(argc, argv, "-thresh", 1.);
- float rotate = find_float_arg(argc, argv, "-rotate", 0);
- float momentum = find_float_arg(argc, argv, "-momentum", .9);
- float lambda = find_float_arg(argc, argv, "-lambda", .01);
- char *prefix = find_char_arg(argc, argv, "-prefix", 0);
- int reconstruct = find_arg(argc, argv, "-reconstruct");
- int smooth_size = find_int_arg(argc, argv, "-smooth", 1);
- network *net = load_network(cfg, weights, 0);
- char *cfgbase = basecfg(cfg);
- char *imbase = basecfg(input);
- set_batch_network(net, 1);
- image im = load_image_color(input, 0, 0);
- if(0){
- float scale = 1;
- if(im.w > 512 || im.h > 512){
- if(im.w > im.h) scale = 512.0/im.w;
- else scale = 512.0/im.h;
- }
- image resized = resize_image(im, scale*im.w, scale*im.h);
- free_image(im);
- im = resized;
- }
- //im = letterbox_image(im, net->w, net->h);
- float *features = 0;
- image update;
- if (reconstruct){
- net->n = max_layer;
- im = letterbox_image(im, net->w, net->h);
- //resize_network(&net, im.w, im.h);
- network_predict(net, im.data);
- if(net->layers[net->n-1].type == REGION){
- printf("region!\n");
- zero_objectness(net->layers[net->n-1]);
- }
- image out_im = copy_image(get_network_image(net));
- /*
- image crop = crop_image(out_im, zz, zz, out_im.w-2*zz, out_im.h-2*zz);
- //flip_image(crop);
- image f_im = resize_image(crop, out_im.w, out_im.h);
- free_image(crop);
- */
- printf("%d features\n", out_im.w*out_im.h*out_im.c);
- features = out_im.data;
- /*
- int i;
- for(i = 0; i < 14*14*512; ++i){
- //features[i] += rand_uniform(-.19, .19);
- }
- free_image(im);
- im = make_random_image(im.w, im.h, im.c);
- */
- update = make_image(im.w, im.h, im.c);
- }
- int e;
- int n;
- for(e = 0; e < rounds; ++e){
- fprintf(stderr, "Iteration: ");
- fflush(stderr);
- for(n = 0; n < iters; ++n){
- fprintf(stderr, "%d, ", n);
- fflush(stderr);
- if(reconstruct){
- reconstruct_picture(net, features, im, update, rate, momentum, lambda, smooth_size, 1);
- //if ((n+1)%30 == 0) rate *= .5;
- show_image(im, "reconstruction", 10);
- }else{
- int layer = max_layer + rand()%range - range/2;
- int octave = rand()%octaves;
- optimize_picture(net, im, layer, 1/pow(1.33333333, octave), rate, thresh, norm);
- }
- }
- fprintf(stderr, "done\n");
- if(0){
- image g = grayscale_image(im);
- free_image(im);
- im = g;
- }
- char buff[256];
- if (prefix){
- sprintf(buff, "%s/%s_%s_%d_%06d",prefix, imbase, cfgbase, max_layer, e);
- }else{
- sprintf(buff, "%s_%s_%d_%06d",imbase, cfgbase, max_layer, e);
- }
- printf("%d %s\n", e, buff);
- save_image(im, buff);
- //show_image(im, buff, 0);
- if(rotate){
- image rot = rotate_image(im, rotate);
- free_image(im);
- im = rot;
- }
- image crop = crop_image(im, im.w * (1. - zoom)/2., im.h * (1.-zoom)/2., im.w*zoom, im.h*zoom);
- image resized = resize_image(crop, im.w, im.h);
- free_image(im);
- free_image(crop);
- im = resized;
- }
- }
|