Kaynağa Gözat

Added experimental overwrite files

Toby Chui 3 yıl önce
işleme
f43823a61e
5 değiştirilmiş dosya ile 4225 ekleme ve 0 silme
  1. 3 0
      README.md
  2. 1098 0
      examples/classifier.c
  3. 850 0
      examples/detector.c
  4. 806 0
      include/darknet.h
  5. 1468 0
      src/image.c

+ 3 - 0
README.md

@@ -0,0 +1,3 @@
+# Darknet-mod
+
+Overwrite this repo on top of the original darknet neural network repo to get the one used by ArozOS system

+ 1098 - 0
examples/classifier.c

@@ -0,0 +1,1098 @@
+#include "darknet.h"
+
+#include <sys/time.h>
+#include <assert.h>
+
+float *get_regression_values(char **labels, int n)
+{
+    float *v = calloc(n, sizeof(float));
+    int i;
+    for(i = 0; i < n; ++i){
+        char *p = strchr(labels[i], ' ');
+        *p = 0;
+        v[i] = atof(p+1);
+    }
+    return v;
+}
+
+void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear)
+{
+    int i;
+
+    float avg_loss = -1;
+    char *base = basecfg(cfgfile);
+    printf("%s\n", base);
+    printf("%d\n", ngpus);
+    network **nets = calloc(ngpus, sizeof(network*));
+
+    srand(time(0));
+    int seed = rand();
+    for(i = 0; i < ngpus; ++i){
+        srand(seed);
+#ifdef GPU
+        cuda_set_device(gpus[i]);
+#endif
+        nets[i] = load_network(cfgfile, weightfile, clear);
+        nets[i]->learning_rate *= ngpus;
+    }
+    srand(time(0));
+    network *net = nets[0];
+
+    int imgs = net->batch * net->subdivisions * ngpus;
+
+    printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay);
+    list *options = read_data_cfg(datacfg);
+
+    char *backup_directory = option_find_str(options, "backup", "/backup/");
+    int tag = option_find_int_quiet(options, "tag", 0);
+    char *label_list = option_find_str(options, "labels", "data/labels.list");
+    char *train_list = option_find_str(options, "train", "data/train.list");
+    char *tree = option_find_str(options, "tree", 0);
+    if (tree) net->hierarchy = read_tree(tree);
+    int classes = option_find_int(options, "classes", 2);
+
+    char **labels = 0;
+    if(!tag){
+        labels = get_labels(label_list);
+    }
+    list *plist = get_paths(train_list);
+    char **paths = (char **)list_to_array(plist);
+    printf("%d\n", plist->size);
+    int N = plist->size;
+    double time;
+
+    load_args args = {0};
+    args.w = net->w;
+    args.h = net->h;
+    args.threads = 32;
+    args.hierarchy = net->hierarchy;
+
+    args.min = net->min_ratio*net->w;
+    args.max = net->max_ratio*net->w;
+    printf("%d %d\n", args.min, args.max);
+    args.angle = net->angle;
+    args.aspect = net->aspect;
+    args.exposure = net->exposure;
+    args.saturation = net->saturation;
+    args.hue = net->hue;
+    args.size = net->w;
+
+    args.paths = paths;
+    args.classes = classes;
+    args.n = imgs;
+    args.m = N;
+    args.labels = labels;
+    if (tag){
+        args.type = TAG_DATA;
+    } else {
+        args.type = CLASSIFICATION_DATA;
+    }
+
+    data train;
+    data buffer;
+    pthread_t load_thread;
+    args.d = &buffer;
+    load_thread = load_data(args);
+
+    int count = 0;
+    int epoch = (*net->seen)/N;
+    while(get_current_batch(net) < net->max_batches || net->max_batches == 0){
+        if(net->random && count++%40 == 0){
+            printf("Resizing\n");
+            int dim = (rand() % 11 + 4) * 32;
+            //if (get_current_batch(net)+200 > net->max_batches) dim = 608;
+            //int dim = (rand() % 4 + 16) * 32;
+            printf("%d\n", dim);
+            args.w = dim;
+            args.h = dim;
+            args.size = dim;
+            args.min = net->min_ratio*dim;
+            args.max = net->max_ratio*dim;
+            printf("%d %d\n", args.min, args.max);
+
+            pthread_join(load_thread, 0);
+            train = buffer;
+            free_data(train);
+            load_thread = load_data(args);
+
+            for(i = 0; i < ngpus; ++i){
+                resize_network(nets[i], dim, dim);
+            }
+            net = nets[0];
+        }
+        time = what_time_is_it_now();
+
+        pthread_join(load_thread, 0);
+        train = buffer;
+        load_thread = load_data(args);
+
+        printf("Loaded: %lf seconds\n", what_time_is_it_now()-time);
+        time = what_time_is_it_now();
+
+        float loss = 0;
+#ifdef GPU
+        if(ngpus == 1){
+            loss = train_network(net, train);
+        } else {
+            loss = train_networks(nets, ngpus, train, 4);
+        }
+#else
+        loss = train_network(net, train);
+#endif
+        if(avg_loss == -1) avg_loss = loss;
+        avg_loss = avg_loss*.9 + loss*.1;
+        printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net->seen)/N, loss, avg_loss, get_current_rate(net), what_time_is_it_now()-time, *net->seen);
+        free_data(train);
+        if(*net->seen/N > epoch){
+            epoch = *net->seen/N;
+            char buff[256];
+            sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch);
+            save_weights(net, buff);
+        }
+        if(get_current_batch(net)%1000 == 0){
+            char buff[256];
+            sprintf(buff, "%s/%s.backup",backup_directory,base);
+            save_weights(net, buff);
+        }
+    }
+    char buff[256];
+    sprintf(buff, "%s/%s.weights", backup_directory, base);
+    save_weights(net, buff);
+    pthread_join(load_thread, 0);
+
+    free_network(net);
+    if(labels) free_ptrs((void**)labels, classes);
+    free_ptrs((void**)paths, plist->size);
+    free_list(plist);
+    free(base);
+}
+
+void validate_classifier_crop(char *datacfg, char *filename, char *weightfile)
+{
+    int i = 0;
+    network *net = load_network(filename, weightfile, 0);
+    srand(time(0));
+
+    list *options = read_data_cfg(datacfg);
+
+    char *label_list = option_find_str(options, "labels", "data/labels.list");
+    char *valid_list = option_find_str(options, "valid", "data/train.list");
+    int classes = option_find_int(options, "classes", 2);
+    int topk = option_find_int(options, "top", 1);
+
+    char **labels = get_labels(label_list);
+    list *plist = get_paths(valid_list);
+
+    char **paths = (char **)list_to_array(plist);
+    int m = plist->size;
+    free_list(plist);
+
+    clock_t time;
+    float avg_acc = 0;
+    float avg_topk = 0;
+    int splits = m/1000;
+    int num = (i+1)*m/splits - i*m/splits;
+
+    data val, buffer;
+
+    load_args args = {0};
+    args.w = net->w;
+    args.h = net->h;
+
+    args.paths = paths;
+    args.classes = classes;
+    args.n = num;
+    args.m = 0;
+    args.labels = labels;
+    args.d = &buffer;
+    args.type = OLD_CLASSIFICATION_DATA;
+
+    pthread_t load_thread = load_data_in_thread(args);
+    for(i = 1; i <= splits; ++i){
+        time=clock();
+
+        pthread_join(load_thread, 0);
+        val = buffer;
+
+        num = (i+1)*m/splits - i*m/splits;
+        char **part = paths+(i*m/splits);
+        if(i != splits){
+            args.paths = part;
+            load_thread = load_data_in_thread(args);
+        }
+        printf("Loaded: %d images in %lf seconds\n", val.X.rows, sec(clock()-time));
+
+        time=clock();
+        float *acc = network_accuracies(net, val, topk);
+        avg_acc += acc[0];
+        avg_topk += acc[1];
+        printf("%d: top 1: %f, top %d: %f, %lf seconds, %d images\n", i, avg_acc/i, topk, avg_topk/i, sec(clock()-time), val.X.rows);
+        free_data(val);
+    }
+}
+
+void validate_classifier_10(char *datacfg, char *filename, char *weightfile)
+{
+    int i, j;
+    network *net = load_network(filename, weightfile, 0);
+    set_batch_network(net, 1);
+    srand(time(0));
+
+    list *options = read_data_cfg(datacfg);
+
+    char *label_list = option_find_str(options, "labels", "data/labels.list");
+    char *valid_list = option_find_str(options, "valid", "data/train.list");
+    int classes = option_find_int(options, "classes", 2);
+    int topk = option_find_int(options, "top", 1);
+
+    char **labels = get_labels(label_list);
+    list *plist = get_paths(valid_list);
+
+    char **paths = (char **)list_to_array(plist);
+    int m = plist->size;
+    free_list(plist);
+
+    float avg_acc = 0;
+    float avg_topk = 0;
+    int *indexes = calloc(topk, sizeof(int));
+
+    for(i = 0; i < m; ++i){
+        int class = -1;
+        char *path = paths[i];
+        for(j = 0; j < classes; ++j){
+            if(strstr(path, labels[j])){
+                class = j;
+                break;
+            }
+        }
+        int w = net->w;
+        int h = net->h;
+        int shift = 32;
+        image im = load_image_color(paths[i], w+shift, h+shift);
+        image images[10];
+        images[0] = crop_image(im, -shift, -shift, w, h);
+        images[1] = crop_image(im, shift, -shift, w, h);
+        images[2] = crop_image(im, 0, 0, w, h);
+        images[3] = crop_image(im, -shift, shift, w, h);
+        images[4] = crop_image(im, shift, shift, w, h);
+        flip_image(im);
+        images[5] = crop_image(im, -shift, -shift, w, h);
+        images[6] = crop_image(im, shift, -shift, w, h);
+        images[7] = crop_image(im, 0, 0, w, h);
+        images[8] = crop_image(im, -shift, shift, w, h);
+        images[9] = crop_image(im, shift, shift, w, h);
+        float *pred = calloc(classes, sizeof(float));
+        for(j = 0; j < 10; ++j){
+            float *p = network_predict(net, images[j].data);
+            if(net->hierarchy) hierarchy_predictions(p, net->outputs, net->hierarchy, 1, 1);
+            axpy_cpu(classes, 1, p, 1, pred, 1);
+            free_image(images[j]);
+        }
+        free_image(im);
+        top_k(pred, classes, topk, indexes);
+        free(pred);
+        if(indexes[0] == class) avg_acc += 1;
+        for(j = 0; j < topk; ++j){
+            if(indexes[j] == class) avg_topk += 1;
+        }
+
+        printf("%d: top 1: %f, top %d: %f\n", i, avg_acc/(i+1), topk, avg_topk/(i+1));
+    }
+}
+
+void validate_classifier_full(char *datacfg, char *filename, char *weightfile)
+{
+    int i, j;
+    network *net = load_network(filename, weightfile, 0);
+    set_batch_network(net, 1);
+    srand(time(0));
+
+    list *options = read_data_cfg(datacfg);
+
+    char *label_list = option_find_str(options, "labels", "data/labels.list");
+    char *valid_list = option_find_str(options, "valid", "data/train.list");
+    int classes = option_find_int(options, "classes", 2);
+    int topk = option_find_int(options, "top", 1);
+
+    char **labels = get_labels(label_list);
+    list *plist = get_paths(valid_list);
+
+    char **paths = (char **)list_to_array(plist);
+    int m = plist->size;
+    free_list(plist);
+
+    float avg_acc = 0;
+    float avg_topk = 0;
+    int *indexes = calloc(topk, sizeof(int));
+
+    int size = net->w;
+    for(i = 0; i < m; ++i){
+        int class = -1;
+        char *path = paths[i];
+        for(j = 0; j < classes; ++j){
+            if(strstr(path, labels[j])){
+                class = j;
+                break;
+            }
+        }
+        image im = load_image_color(paths[i], 0, 0);
+        image resized = resize_min(im, size);
+        resize_network(net, resized.w, resized.h);
+        //show_image(im, "orig");
+        //show_image(crop, "cropped");
+        //cvWaitKey(0);
+        float *pred = network_predict(net, resized.data);
+        if(net->hierarchy) hierarchy_predictions(pred, net->outputs, net->hierarchy, 1, 1);
+
+        free_image(im);
+        free_image(resized);
+        top_k(pred, classes, topk, indexes);
+
+        if(indexes[0] == class) avg_acc += 1;
+        for(j = 0; j < topk; ++j){
+            if(indexes[j] == class) avg_topk += 1;
+        }
+
+        printf("%d: top 1: %f, top %d: %f\n", i, avg_acc/(i+1), topk, avg_topk/(i+1));
+    }
+}
+
+
+void validate_classifier_single(char *datacfg, char *filename, char *weightfile)
+{
+    int i, j;
+    network *net = load_network(filename, weightfile, 0);
+    set_batch_network(net, 1);
+    srand(time(0));
+
+    list *options = read_data_cfg(datacfg);
+
+    char *label_list = option_find_str(options, "labels", "data/labels.list");
+    char *leaf_list = option_find_str(options, "leaves", 0);
+    if(leaf_list) change_leaves(net->hierarchy, leaf_list);
+    char *valid_list = option_find_str(options, "valid", "data/train.list");
+    int classes = option_find_int(options, "classes", 2);
+    int topk = option_find_int(options, "top", 1);
+
+    char **labels = get_labels(label_list);
+    list *plist = get_paths(valid_list);
+
+    char **paths = (char **)list_to_array(plist);
+    int m = plist->size;
+    free_list(plist);
+
+    float avg_acc = 0;
+    float avg_topk = 0;
+    int *indexes = calloc(topk, sizeof(int));
+
+    for(i = 0; i < m; ++i){
+        int class = -1;
+        char *path = paths[i];
+        for(j = 0; j < classes; ++j){
+            if(strstr(path, labels[j])){
+                class = j;
+                break;
+            }
+        }
+        image im = load_image_color(paths[i], 0, 0);
+        image crop = center_crop_image(im, net->w, net->h);
+        //grayscale_image_3c(crop);
+        //show_image(im, "orig");
+        //show_image(crop, "cropped");
+        //cvWaitKey(0);
+        float *pred = network_predict(net, crop.data);
+        if(net->hierarchy) hierarchy_predictions(pred, net->outputs, net->hierarchy, 1, 1);
+
+        free_image(im);
+        free_image(crop);
+        top_k(pred, classes, topk, indexes);
+
+        if(indexes[0] == class) avg_acc += 1;
+        for(j = 0; j < topk; ++j){
+            if(indexes[j] == class) avg_topk += 1;
+        }
+
+        printf("%s, %d, %f, %f, \n", paths[i], class, pred[0], pred[1]);
+        printf("%d: top 1: %f, top %d: %f\n", i, avg_acc/(i+1), topk, avg_topk/(i+1));
+    }
+}
+
+void validate_classifier_multi(char *datacfg, char *cfg, char *weights)
+{
+    int i, j;
+    network *net = load_network(cfg, weights, 0);
+    set_batch_network(net, 1);
+    srand(time(0));
+
+    list *options = read_data_cfg(datacfg);
+
+    char *label_list = option_find_str(options, "labels", "data/labels.list");
+    char *valid_list = option_find_str(options, "valid", "data/train.list");
+    int classes = option_find_int(options, "classes", 2);
+    int topk = option_find_int(options, "top", 1);
+
+    char **labels = get_labels(label_list);
+    list *plist = get_paths(valid_list);
+    //int scales[] = {224, 288, 320, 352, 384};
+    int scales[] = {224, 256, 288, 320};
+    int nscales = sizeof(scales)/sizeof(scales[0]);
+
+    char **paths = (char **)list_to_array(plist);
+    int m = plist->size;
+    free_list(plist);
+
+    float avg_acc = 0;
+    float avg_topk = 0;
+    int *indexes = calloc(topk, sizeof(int));
+
+    for(i = 0; i < m; ++i){
+        int class = -1;
+        char *path = paths[i];
+        for(j = 0; j < classes; ++j){
+            if(strstr(path, labels[j])){
+                class = j;
+                break;
+            }
+        }
+        float *pred = calloc(classes, sizeof(float));
+        image im = load_image_color(paths[i], 0, 0);
+        for(j = 0; j < nscales; ++j){
+            image r = resize_max(im, scales[j]);
+            resize_network(net, r.w, r.h);
+            float *p = network_predict(net, r.data);
+            if(net->hierarchy) hierarchy_predictions(p, net->outputs, net->hierarchy, 1 , 1);
+            axpy_cpu(classes, 1, p, 1, pred, 1);
+            flip_image(r);
+            p = network_predict(net, r.data);
+            axpy_cpu(classes, 1, p, 1, pred, 1);
+            if(r.data != im.data) free_image(r);
+        }
+        free_image(im);
+        top_k(pred, classes, topk, indexes);
+        free(pred);
+        if(indexes[0] == class) avg_acc += 1;
+        for(j = 0; j < topk; ++j){
+            if(indexes[j] == class) avg_topk += 1;
+        }
+
+        printf("%d: top 1: %f, top %d: %f\n", i, avg_acc/(i+1), topk, avg_topk/(i+1));
+    }
+}
+
+void try_classifier(char *datacfg, char *cfgfile, char *weightfile, char *filename, int layer_num)
+{
+    network *net = load_network(cfgfile, weightfile, 0);
+    set_batch_network(net, 1);
+    srand(2222222);
+
+    list *options = read_data_cfg(datacfg);
+
+    char *name_list = option_find_str(options, "names", 0);
+    if(!name_list) name_list = option_find_str(options, "labels", "data/labels.list");
+    int top = option_find_int(options, "top", 1);
+
+    int i = 0;
+    char **names = get_labels(name_list);
+    clock_t time;
+    int *indexes = calloc(top, sizeof(int));
+    char buff[256];
+    char *input = buff;
+    while(1){
+        if(filename){
+            strncpy(input, filename, 256);
+        }else{
+            printf("Enter Image Path: ");
+            fflush(stdout);
+            input = fgets(input, 256, stdin);
+            if(!input) return;
+            strtok(input, "\n");
+        }
+        image orig = load_image_color(input, 0, 0);
+        image r = resize_min(orig, 256);
+        image im = crop_image(r, (r.w - 224 - 1)/2 + 1, (r.h - 224 - 1)/2 + 1, 224, 224);
+        float mean[] = {0.48263312050943, 0.45230225481413, 0.40099074308742};
+        float std[] = {0.22590347483426, 0.22120921437787, 0.22103996251583};
+        float var[3];
+        var[0] = std[0]*std[0];
+        var[1] = std[1]*std[1];
+        var[2] = std[2]*std[2];
+
+        normalize_cpu(im.data, mean, var, 1, 3, im.w*im.h);
+
+        float *X = im.data;
+        time=clock();
+        float *predictions = network_predict(net, X);
+
+        layer l = net->layers[layer_num];
+        for(i = 0; i < l.c; ++i){
+            if(l.rolling_mean) printf("%f %f %f\n", l.rolling_mean[i], l.rolling_variance[i], l.scales[i]);
+        }
+#ifdef GPU
+        cuda_pull_array(l.output_gpu, l.output, l.outputs);
+#endif
+        for(i = 0; i < l.outputs; ++i){
+            printf("%f\n", l.output[i]);
+        }
+        /*
+
+           printf("\n\nWeights\n");
+           for(i = 0; i < l.n*l.size*l.size*l.c; ++i){
+           printf("%f\n", l.filters[i]);
+           }
+
+           printf("\n\nBiases\n");
+           for(i = 0; i < l.n; ++i){
+           printf("%f\n", l.biases[i]);
+           }
+         */
+
+        top_predictions(net, top, indexes);
+        printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time));
+        for(i = 0; i < top; ++i){
+            int index = indexes[i];
+            printf("%s: %f\n", names[index], predictions[index]);
+        }
+        free_image(im);
+        if (filename) break;
+    }
+}
+
+void predict_classifier(char *datacfg, char *cfgfile, char *weightfile, char *filename, int top)
+{
+    network *net = load_network(cfgfile, weightfile, 0);
+    set_batch_network(net, 1);
+    srand(2222222);
+
+    list *options = read_data_cfg(datacfg);
+
+    char *name_list = option_find_str(options, "names", 0);
+    if(!name_list) name_list = option_find_str(options, "labels", "data/labels.list");
+    if(top == 0) top = option_find_int(options, "top", 1);
+
+    int i = 0;
+    char **names = get_labels(name_list);
+    clock_t time;
+    int *indexes = calloc(top, sizeof(int));
+    char buff[256];
+    char *input = buff;
+    while(1){
+        if(filename){
+            strncpy(input, filename, 256);
+        }else{
+            printf("Enter Image Path: ");
+            fflush(stdout);
+            input = fgets(input, 256, stdin);
+            if(!input) return;
+            strtok(input, "\n");
+        }
+        image im = load_image_color(input, 0, 0);
+        image r = letterbox_image(im, net->w, net->h);
+        //image r = resize_min(im, 320);
+        //printf("%d %d\n", r.w, r.h);
+        //resize_network(net, r.w, r.h);
+        //printf("%d %d\n", r.w, r.h);
+
+        float *X = r.data;
+        time=clock();
+        float *predictions = network_predict(net, X);
+        if(net->hierarchy) hierarchy_predictions(predictions, net->outputs, net->hierarchy, 1, 1);
+        top_k(predictions, net->outputs, top, indexes);
+        fprintf(stderr, "%s: Predicted in %f seconds.\n", input, sec(clock()-time));
+        for(i = 0; i < top; ++i){
+            int index = indexes[i];
+            //if(net->hierarchy) printf("%d, %s: %f, parent: %s \n",index, names[index], predictions[index], (net->hierarchy->parent[index] >= 0) ? names[net->hierarchy->parent[index]] : "Root");
+            //else printf("%s: %f\n",names[index], predictions[index]);
+            printf("%5.2f%%: %s\n", predictions[index]*100, names[index]);
+        }
+        if(r.data != im.data) free_image(r);
+        free_image(im);
+        if (filename) break;
+    }
+}
+
+
+void label_classifier(char *datacfg, char *filename, char *weightfile)
+{
+    int i;
+    network *net = load_network(filename, weightfile, 0);
+    set_batch_network(net, 1);
+    srand(time(0));
+
+    list *options = read_data_cfg(datacfg);
+
+    char *label_list = option_find_str(options, "names", "data/labels.list");
+    char *test_list = option_find_str(options, "test", "data/train.list");
+    int classes = option_find_int(options, "classes", 2);
+
+    char **labels = get_labels(label_list);
+    list *plist = get_paths(test_list);
+
+    char **paths = (char **)list_to_array(plist);
+    int m = plist->size;
+    free_list(plist);
+
+    for(i = 0; i < m; ++i){
+        image im = load_image_color(paths[i], 0, 0);
+        image resized = resize_min(im, net->w);
+        image crop = crop_image(resized, (resized.w - net->w)/2, (resized.h - net->h)/2, net->w, net->h);
+        float *pred = network_predict(net, crop.data);
+
+        if(resized.data != im.data) free_image(resized);
+        free_image(im);
+        free_image(crop);
+        int ind = max_index(pred, classes);
+
+        printf("%s\n", labels[ind]);
+    }
+}
+
+void csv_classifier(char *datacfg, char *cfgfile, char *weightfile)
+{
+    int i,j;
+    network *net = load_network(cfgfile, weightfile, 0);
+    srand(time(0));
+
+    list *options = read_data_cfg(datacfg);
+
+    char *test_list = option_find_str(options, "test", "data/test.list");
+    int top = option_find_int(options, "top", 1);
+
+    list *plist = get_paths(test_list);
+
+    char **paths = (char **)list_to_array(plist);
+    int m = plist->size;
+    free_list(plist);
+    int *indexes = calloc(top, sizeof(int));
+
+    for(i = 0; i < m; ++i){
+        double time = what_time_is_it_now();
+        char *path = paths[i];
+        image im = load_image_color(path, 0, 0);
+        image r = letterbox_image(im, net->w, net->h);
+        float *predictions = network_predict(net, r.data);
+        if(net->hierarchy) hierarchy_predictions(predictions, net->outputs, net->hierarchy, 1, 1);
+        top_k(predictions, net->outputs, top, indexes);
+
+        printf("%s", path);
+        for(j = 0; j < top; ++j){
+            printf("\t%d", indexes[j]);
+        }
+        printf("\n");
+
+        free_image(im);
+        free_image(r);
+
+        fprintf(stderr, "%lf seconds, %d images, %d total\n", what_time_is_it_now() - time, i+1, m);
+    }
+}
+
+void test_classifier(char *datacfg, char *cfgfile, char *weightfile, int target_layer)
+{
+    int curr = 0;
+    network *net = load_network(cfgfile, weightfile, 0);
+    srand(time(0));
+
+    list *options = read_data_cfg(datacfg);
+
+    char *test_list = option_find_str(options, "test", "data/test.list");
+    int classes = option_find_int(options, "classes", 2);
+
+    list *plist = get_paths(test_list);
+
+    char **paths = (char **)list_to_array(plist);
+    int m = plist->size;
+    free_list(plist);
+
+    clock_t time;
+
+    data val, buffer;
+
+    load_args args = {0};
+    args.w = net->w;
+    args.h = net->h;
+    args.paths = paths;
+    args.classes = classes;
+    args.n = net->batch;
+    args.m = 0;
+    args.labels = 0;
+    args.d = &buffer;
+    args.type = OLD_CLASSIFICATION_DATA;
+
+    pthread_t load_thread = load_data_in_thread(args);
+    for(curr = net->batch; curr < m; curr += net->batch){
+        time=clock();
+
+        pthread_join(load_thread, 0);
+        val = buffer;
+
+        if(curr < m){
+            args.paths = paths + curr;
+            if (curr + net->batch > m) args.n = m - curr;
+            load_thread = load_data_in_thread(args);
+        }
+        fprintf(stderr, "Loaded: %d images in %lf seconds\n", val.X.rows, sec(clock()-time));
+
+        time=clock();
+        matrix pred = network_predict_data(net, val);
+
+        int i, j;
+        if (target_layer >= 0){
+            //layer l = net->layers[target_layer];
+        }
+
+        for(i = 0; i < pred.rows; ++i){
+            printf("%s", paths[curr-net->batch+i]);
+            for(j = 0; j < pred.cols; ++j){
+                printf("\t%g", pred.vals[i][j]);
+            }
+            printf("\n");
+        }
+
+        free_matrix(pred);
+
+        fprintf(stderr, "%lf seconds, %d images, %d total\n", sec(clock()-time), val.X.rows, curr);
+        free_data(val);
+    }
+}
+
+void file_output_classifier(char *datacfg, char *filename, char *weightfile, char *listfile)
+{
+    int i,j;
+    network *net = load_network(filename, weightfile, 0);
+    set_batch_network(net, 1);
+    srand(time(0));
+
+    list *options = read_data_cfg(datacfg);
+
+    //char *label_list = option_find_str(options, "names", "data/labels.list");
+    int classes = option_find_int(options, "classes", 2);
+
+    list *plist = get_paths(listfile);
+
+    char **paths = (char **)list_to_array(plist);
+    int m = plist->size;
+    free_list(plist);
+
+    for(i = 0; i < m; ++i){
+        image im = load_image_color(paths[i], 0, 0);
+        image resized = resize_min(im, net->w);
+        image crop = crop_image(resized, (resized.w - net->w)/2, (resized.h - net->h)/2, net->w, net->h);
+
+        float *pred = network_predict(net, crop.data);
+        if(net->hierarchy) hierarchy_predictions(pred, net->outputs, net->hierarchy, 0, 1);
+
+        if(resized.data != im.data) free_image(resized);
+        free_image(im);
+        free_image(crop);
+
+        printf("%s", paths[i]);
+        for(j = 0; j < classes; ++j){
+            printf("\t%g", pred[j]);
+        }
+        printf("\n");
+    }
+}
+
+
+void threat_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_index, const char *filename)
+{
+#ifdef OPENCV
+    float threat = 0;
+    float roll = .2;
+
+    printf("Classifier Demo\n");
+    network *net = load_network(cfgfile, weightfile, 0);
+    set_batch_network(net, 1);
+    list *options = read_data_cfg(datacfg);
+
+    srand(2222222);
+    void * cap = open_video_stream(filename, cam_index, 0,0,0);
+
+    int top = option_find_int(options, "top", 1);
+
+    char *name_list = option_find_str(options, "names", 0);
+    char **names = get_labels(name_list);
+
+    int *indexes = calloc(top, sizeof(int));
+
+    if(!cap) error("Couldn't connect to webcam.\n");
+    //cvNamedWindow("Threat", CV_WINDOW_NORMAL); 
+    //cvResizeWindow("Threat", 512, 512);
+    float fps = 0;
+    int i;
+
+    int count = 0;
+
+    while(1){
+        ++count;
+        struct timeval tval_before, tval_after, tval_result;
+        gettimeofday(&tval_before, NULL);
+
+        image in = get_image_from_stream(cap);
+        if(!in.data) break;
+        image in_s = resize_image(in, net->w, net->h);
+
+        image out = in;
+        int x1 = out.w / 20;
+        int y1 = out.h / 20;
+        int x2 = 2*x1;
+        int y2 = out.h - out.h/20;
+
+        int border = .01*out.h;
+        int h = y2 - y1 - 2*border;
+        int w = x2 - x1 - 2*border;
+
+        float *predictions = network_predict(net, in_s.data);
+        float curr_threat = 0;
+        if(1){
+            curr_threat = predictions[0] * 0 + 
+                predictions[1] * .6 + 
+                predictions[2];
+        } else {
+            curr_threat = predictions[218] +
+                predictions[539] + 
+                predictions[540] + 
+                predictions[368] + 
+                predictions[369] + 
+                predictions[370];
+        }
+        threat = roll * curr_threat + (1-roll) * threat;
+
+        draw_box_width(out, x2 + border, y1 + .02*h, x2 + .5 * w, y1 + .02*h + border, border, 0,0,0);
+        if(threat > .97) {
+            draw_box_width(out,  x2 + .5 * w + border,
+                    y1 + .02*h - 2*border, 
+                    x2 + .5 * w + 6*border, 
+                    y1 + .02*h + 3*border, 3*border, 1,0,0);
+        }
+        draw_box_width(out,  x2 + .5 * w + border,
+                y1 + .02*h - 2*border, 
+                x2 + .5 * w + 6*border, 
+                y1 + .02*h + 3*border, .5*border, 0,0,0);
+        draw_box_width(out, x2 + border, y1 + .42*h, x2 + .5 * w, y1 + .42*h + border, border, 0,0,0);
+        if(threat > .57) {
+            draw_box_width(out,  x2 + .5 * w + border,
+                    y1 + .42*h - 2*border, 
+                    x2 + .5 * w + 6*border, 
+                    y1 + .42*h + 3*border, 3*border, 1,1,0);
+        }
+        draw_box_width(out,  x2 + .5 * w + border,
+                y1 + .42*h - 2*border, 
+                x2 + .5 * w + 6*border, 
+                y1 + .42*h + 3*border, .5*border, 0,0,0);
+
+        draw_box_width(out, x1, y1, x2, y2, border, 0,0,0);
+        for(i = 0; i < threat * h ; ++i){
+            float ratio = (float) i / h;
+            float r = (ratio < .5) ? (2*(ratio)) : 1;
+            float g = (ratio < .5) ? 1 : 1 - 2*(ratio - .5);
+            draw_box_width(out, x1 + border, y2 - border - i, x2 - border, y2 - border - i, 1, r, g, 0);
+        }
+        top_predictions(net, top, indexes);
+        char buff[256];
+        sprintf(buff, "/home/pjreddie/tmp/threat_%06d", count);
+        //save_image(out, buff);
+
+        printf("\033[2J");
+        printf("\033[1;1H");
+        printf("\nFPS:%.0f\n",fps);
+
+        for(i = 0; i < top; ++i){
+            int index = indexes[i];
+            printf("%.1f%%: %s\n", predictions[index]*100, names[index]);
+        }
+
+        if(1){
+            show_image(out, "Threat", 10);
+        }
+        free_image(in_s);
+        free_image(in);
+
+        gettimeofday(&tval_after, NULL);
+        timersub(&tval_after, &tval_before, &tval_result);
+        float curr = 1000000.f/((long int)tval_result.tv_usec);
+        fps = .9*fps + .1*curr;
+    }
+#endif
+}
+
+
+void gun_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_index, const char *filename)
+{
+#ifdef OPENCV
+    int bad_cats[] = {218, 539, 540, 1213, 1501, 1742, 1911, 2415, 4348, 19223, 368, 369, 370, 1133, 1200, 1306, 2122, 2301, 2537, 2823, 3179, 3596, 3639, 4489, 5107, 5140, 5289, 6240, 6631, 6762, 7048, 7171, 7969, 7984, 7989, 8824, 8927, 9915, 10270, 10448, 13401, 15205, 18358, 18894, 18895, 19249, 19697};
+
+    printf("Classifier Demo\n");
+    network *net = load_network(cfgfile, weightfile, 0);
+    set_batch_network(net, 1);
+    list *options = read_data_cfg(datacfg);
+
+    srand(2222222);
+    void * cap = open_video_stream(filename, cam_index, 0,0,0);
+
+    int top = option_find_int(options, "top", 1);
+
+    char *name_list = option_find_str(options, "names", 0);
+    char **names = get_labels(name_list);
+
+    int *indexes = calloc(top, sizeof(int));
+
+    if(!cap) error("Couldn't connect to webcam.\n");
+    float fps = 0;
+    int i;
+
+    while(1){
+        struct timeval tval_before, tval_after, tval_result;
+        gettimeofday(&tval_before, NULL);
+
+        image in = get_image_from_stream(cap);
+        image in_s = resize_image(in, net->w, net->h);
+
+        float *predictions = network_predict(net, in_s.data);
+        top_predictions(net, top, indexes);
+
+        printf("\033[2J");
+        printf("\033[1;1H");
+
+        int threat = 0;
+        for(i = 0; i < sizeof(bad_cats)/sizeof(bad_cats[0]); ++i){
+            int index = bad_cats[i];
+            if(predictions[index] > .01){
+                printf("Threat Detected!\n");
+                threat = 1;
+                break;
+            }
+        }
+        if(!threat) printf("Scanning...\n");
+        for(i = 0; i < sizeof(bad_cats)/sizeof(bad_cats[0]); ++i){
+            int index = bad_cats[i];
+            if(predictions[index] > .01){
+                printf("%s\n", names[index]);
+            }
+        }
+
+        show_image(in, "Threat Detection", 10);
+        free_image(in_s);
+        free_image(in);
+
+        gettimeofday(&tval_after, NULL);
+        timersub(&tval_after, &tval_before, &tval_result);
+        float curr = 1000000.f/((long int)tval_result.tv_usec);
+        fps = .9*fps + .1*curr;
+    }
+#endif
+}
+
+void demo_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_index, const char *filename)
+{
+#ifdef OPENCV
+    char *base = basecfg(cfgfile);
+    image **alphabet = load_alphabet();
+    printf("Classifier Demo\n");
+    network *net = load_network(cfgfile, weightfile, 0);
+    set_batch_network(net, 1);
+    list *options = read_data_cfg(datacfg);
+
+    srand(2222222);
+
+    int w = 1280;
+    int h = 720;
+    void * cap = open_video_stream(filename, cam_index, w, h, 0);
+
+    int top = option_find_int(options, "top", 1);
+
+    char *label_list = option_find_str(options, "labels", 0);
+    char *name_list = option_find_str(options, "names", label_list);
+    char **names = get_labels(name_list);
+
+    int *indexes = calloc(top, sizeof(int));
+
+    if(!cap) error("Couldn't connect to webcam.\n");
+    float fps = 0;
+    int i;
+
+    while(1){
+        struct timeval tval_before, tval_after, tval_result;
+        gettimeofday(&tval_before, NULL);
+
+        image in = get_image_from_stream(cap);
+        //image in_s = resize_image(in, net->w, net->h);
+        image in_s = letterbox_image(in, net->w, net->h);
+
+        float *predictions = network_predict(net, in_s.data);
+        if(net->hierarchy) hierarchy_predictions(predictions, net->outputs, net->hierarchy, 1, 1);
+        top_predictions(net, top, indexes);
+
+        printf("\033[2J");
+        printf("\033[1;1H");
+        printf("\nFPS:%.0f\n",fps);
+
+        int lh = in.h*.03;
+        int toph = 3*lh;
+
+        float rgb[3] = {1,1,1};
+        for(i = 0; i < top; ++i){
+            printf("%d\n", toph);
+            int index = indexes[i];
+            printf("%.1f%%: %s\n", predictions[index]*100, names[index]);
+
+            char buff[1024];
+            sprintf(buff, "%3.1f%%: %s\n", predictions[index]*100, names[index]);
+            image label = get_label(alphabet, buff, lh);
+            draw_label(in, toph, lh, label, rgb);
+            toph += 2*lh;
+            free_image(label);
+        }
+
+        show_image(in, base, 10);
+        free_image(in_s);
+        free_image(in);
+
+        gettimeofday(&tval_after, NULL);
+        timersub(&tval_after, &tval_before, &tval_result);
+        float curr = 1000000.f/((long int)tval_result.tv_usec);
+        fps = .9*fps + .1*curr;
+    }
+#endif
+}
+
+
+void run_classifier(int argc, char **argv)
+{
+    if(argc < 4){
+        fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]);
+        return;
+    }
+
+    char *gpu_list = find_char_arg(argc, argv, "-gpus", 0);
+    int ngpus;
+    int *gpus = read_intlist(gpu_list, &ngpus, gpu_index);
+
+
+    int cam_index = find_int_arg(argc, argv, "-c", 0);
+    int top = find_int_arg(argc, argv, "-t", 0);
+    int clear = find_arg(argc, argv, "-clear");
+    char *data = argv[3];
+    char *cfg = argv[4];
+    char *weights = (argc > 5) ? argv[5] : 0;
+    char *filename = (argc > 6) ? argv[6]: 0;
+    char *layer_s = (argc > 7) ? argv[7]: 0;
+    int layer = layer_s ? atoi(layer_s) : -1;
+    if(0==strcmp(argv[2], "predict")) predict_classifier(data, cfg, weights, filename, top);
+    else if(0==strcmp(argv[2], "fout")) file_output_classifier(data, cfg, weights, filename);
+    else if(0==strcmp(argv[2], "try")) try_classifier(data, cfg, weights, filename, atoi(layer_s));
+    else if(0==strcmp(argv[2], "train")) train_classifier(data, cfg, weights, gpus, ngpus, clear);
+    else if(0==strcmp(argv[2], "demo")) demo_classifier(data, cfg, weights, cam_index, filename);
+    else if(0==strcmp(argv[2], "gun")) gun_classifier(data, cfg, weights, cam_index, filename);
+    else if(0==strcmp(argv[2], "threat")) threat_classifier(data, cfg, weights, cam_index, filename);
+    else if(0==strcmp(argv[2], "test")) test_classifier(data, cfg, weights, layer);
+    else if(0==strcmp(argv[2], "csv")) csv_classifier(data, cfg, weights);
+    else if(0==strcmp(argv[2], "label")) label_classifier(data, cfg, weights);
+    else if(0==strcmp(argv[2], "valid")) validate_classifier_single(data, cfg, weights);
+    else if(0==strcmp(argv[2], "validmulti")) validate_classifier_multi(data, cfg, weights);
+    else if(0==strcmp(argv[2], "valid10")) validate_classifier_10(data, cfg, weights);
+    else if(0==strcmp(argv[2], "validcrop")) validate_classifier_crop(data, cfg, weights);
+    else if(0==strcmp(argv[2], "validfull")) validate_classifier_full(data, cfg, weights);
+}
+
+

+ 850 - 0
examples/detector.c

@@ -0,0 +1,850 @@
+#include "darknet.h"
+
+static int coco_ids[] = {1,2,3,4,5,6,7,8,9,10,11,13,14,15,16,17,18,19,20,21,22,23,24,25,27,28,31,32,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,67,70,72,73,74,75,76,77,78,79,80,81,82,84,85,86,87,88,89,90};
+
+
+void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear)
+{
+    list *options = read_data_cfg(datacfg);
+    char *train_images = option_find_str(options, "train", "data/train.list");
+    char *backup_directory = option_find_str(options, "backup", "/backup/");
+
+    srand(time(0));
+    char *base = basecfg(cfgfile);
+    printf("%s\n", base);
+    float avg_loss = -1;
+    network **nets = calloc(ngpus, sizeof(network));
+
+    srand(time(0));
+    int seed = rand();
+    int i;
+    for(i = 0; i < ngpus; ++i){
+        srand(seed);
+#ifdef GPU
+        cuda_set_device(gpus[i]);
+#endif
+        nets[i] = load_network(cfgfile, weightfile, clear);
+        nets[i]->learning_rate *= ngpus;
+    }
+    srand(time(0));
+    network *net = nets[0];
+
+    int imgs = net->batch * net->subdivisions * ngpus;
+    printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay);
+    data train, buffer;
+
+    layer l = net->layers[net->n - 1];
+
+    int classes = l.classes;
+    float jitter = l.jitter;
+
+    list *plist = get_paths(train_images);
+    //int N = plist->size;
+    char **paths = (char **)list_to_array(plist);
+
+    load_args args = get_base_args(net);
+    args.coords = l.coords;
+    args.paths = paths;
+    args.n = imgs;
+    args.m = plist->size;
+    args.classes = classes;
+    args.jitter = jitter;
+    args.num_boxes = l.max_boxes;
+    args.d = &buffer;
+    args.type = DETECTION_DATA;
+    //args.type = INSTANCE_DATA;
+    args.threads = 64;
+
+    pthread_t load_thread = load_data(args);
+    double time;
+    int count = 0;
+    //while(i*imgs < N*120){
+    while(get_current_batch(net) < net->max_batches){
+        if(l.random && count++%10 == 0){
+            printf("Resizing\n");
+            int dim = (rand() % 10 + 10) * 32;
+            if (get_current_batch(net)+200 > net->max_batches) dim = 608;
+            //int dim = (rand() % 4 + 16) * 32;
+            printf("%d\n", dim);
+            args.w = dim;
+            args.h = dim;
+
+            pthread_join(load_thread, 0);
+            train = buffer;
+            free_data(train);
+            load_thread = load_data(args);
+
+            #pragma omp parallel for
+            for(i = 0; i < ngpus; ++i){
+                resize_network(nets[i], dim, dim);
+            }
+            net = nets[0];
+        }
+        time=what_time_is_it_now();
+        pthread_join(load_thread, 0);
+        train = buffer;
+        load_thread = load_data(args);
+
+        /*
+           int k;
+           for(k = 0; k < l.max_boxes; ++k){
+           box b = float_to_box(train.y.vals[10] + 1 + k*5);
+           if(!b.x) break;
+           printf("loaded: %f %f %f %f\n", b.x, b.y, b.w, b.h);
+           }
+         */
+        /*
+           int zz;
+           for(zz = 0; zz < train.X.cols; ++zz){
+           image im = float_to_image(net->w, net->h, 3, train.X.vals[zz]);
+           int k;
+           for(k = 0; k < l.max_boxes; ++k){
+           box b = float_to_box(train.y.vals[zz] + k*5, 1);
+           printf("%f %f %f %f\n", b.x, b.y, b.w, b.h);
+           draw_bbox(im, b, 1, 1,0,0);
+           }
+           show_image(im, "truth11");
+           cvWaitKey(0);
+           save_image(im, "truth11");
+           }
+         */
+
+        printf("Loaded: %lf seconds\n", what_time_is_it_now()-time);
+
+        time=what_time_is_it_now();
+        float loss = 0;
+#ifdef GPU
+        if(ngpus == 1){
+            loss = train_network(net, train);
+        } else {
+            loss = train_networks(nets, ngpus, train, 4);
+        }
+#else
+        loss = train_network(net, train);
+#endif
+        if (avg_loss < 0) avg_loss = loss;
+        avg_loss = avg_loss*.9 + loss*.1;
+
+        i = get_current_batch(net);
+        printf("%ld: %f, %f avg, %f rate, %lf seconds, %d images\n", get_current_batch(net), loss, avg_loss, get_current_rate(net), what_time_is_it_now()-time, i*imgs);
+        if(i%100==0){
+#ifdef GPU
+            if(ngpus != 1) sync_nets(nets, ngpus, 0);
+#endif
+            char buff[256];
+            sprintf(buff, "%s/%s.backup", backup_directory, base);
+            save_weights(net, buff);
+        }
+        if(i%10000==0 || (i < 1000 && i%100 == 0)){
+#ifdef GPU
+            if(ngpus != 1) sync_nets(nets, ngpus, 0);
+#endif
+            char buff[256];
+            sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i);
+            save_weights(net, buff);
+        }
+        free_data(train);
+    }
+#ifdef GPU
+    if(ngpus != 1) sync_nets(nets, ngpus, 0);
+#endif
+    char buff[256];
+    sprintf(buff, "%s/%s_final.weights", backup_directory, base);
+    save_weights(net, buff);
+}
+
+
+static int get_coco_image_id(char *filename)
+{
+    char *p = strrchr(filename, '/');
+    char *c = strrchr(filename, '_');
+    if(c) p = c;
+    return atoi(p+1);
+}
+
+static void print_cocos(FILE *fp, char *image_path, detection *dets, int num_boxes, int classes, int w, int h)
+{
+    int i, j;
+    int image_id = get_coco_image_id(image_path);
+    for(i = 0; i < num_boxes; ++i){
+        float xmin = dets[i].bbox.x - dets[i].bbox.w/2.;
+        float xmax = dets[i].bbox.x + dets[i].bbox.w/2.;
+        float ymin = dets[i].bbox.y - dets[i].bbox.h/2.;
+        float ymax = dets[i].bbox.y + dets[i].bbox.h/2.;
+
+        if (xmin < 0) xmin = 0;
+        if (ymin < 0) ymin = 0;
+        if (xmax > w) xmax = w;
+        if (ymax > h) ymax = h;
+
+        float bx = xmin;
+        float by = ymin;
+        float bw = xmax - xmin;
+        float bh = ymax - ymin;
+
+        for(j = 0; j < classes; ++j){
+            if (dets[i].prob[j]) fprintf(fp, "{\"image_id\":%d, \"category_id\":%d, \"bbox\":[%f, %f, %f, %f], \"score\":%f},\n", image_id, coco_ids[j], bx, by, bw, bh, dets[i].prob[j]);
+        }
+    }
+}
+
+void print_detector_detections(FILE **fps, char *id, detection *dets, int total, int classes, int w, int h)
+{
+    int i, j;
+    for(i = 0; i < total; ++i){
+        float xmin = dets[i].bbox.x - dets[i].bbox.w/2. + 1;
+        float xmax = dets[i].bbox.x + dets[i].bbox.w/2. + 1;
+        float ymin = dets[i].bbox.y - dets[i].bbox.h/2. + 1;
+        float ymax = dets[i].bbox.y + dets[i].bbox.h/2. + 1;
+
+        if (xmin < 1) xmin = 1;
+        if (ymin < 1) ymin = 1;
+        if (xmax > w) xmax = w;
+        if (ymax > h) ymax = h;
+
+        for(j = 0; j < classes; ++j){
+            if (dets[i].prob[j]) fprintf(fps[j], "%s %f %f %f %f %f\n", id, dets[i].prob[j],
+                    xmin, ymin, xmax, ymax);
+        }
+    }
+}
+
+void print_imagenet_detections(FILE *fp, int id, detection *dets, int total, int classes, int w, int h)
+{
+    int i, j;
+    for(i = 0; i < total; ++i){
+        float xmin = dets[i].bbox.x - dets[i].bbox.w/2.;
+        float xmax = dets[i].bbox.x + dets[i].bbox.w/2.;
+        float ymin = dets[i].bbox.y - dets[i].bbox.h/2.;
+        float ymax = dets[i].bbox.y + dets[i].bbox.h/2.;
+
+        if (xmin < 0) xmin = 0;
+        if (ymin < 0) ymin = 0;
+        if (xmax > w) xmax = w;
+        if (ymax > h) ymax = h;
+
+        for(j = 0; j < classes; ++j){
+            int class = j;
+            if (dets[i].prob[class]) fprintf(fp, "%d %d %f %f %f %f %f\n", id, j+1, dets[i].prob[class],
+                    xmin, ymin, xmax, ymax);
+        }
+    }
+}
+
+void validate_detector_flip(char *datacfg, char *cfgfile, char *weightfile, char *outfile)
+{
+    int j;
+    list *options = read_data_cfg(datacfg);
+    char *valid_images = option_find_str(options, "valid", "data/train.list");
+    char *name_list = option_find_str(options, "names", "data/names.list");
+    char *prefix = option_find_str(options, "results", "results");
+    char **names = get_labels(name_list);
+    char *mapf = option_find_str(options, "map", 0);
+    int *map = 0;
+    if (mapf) map = read_map(mapf);
+
+    network *net = load_network(cfgfile, weightfile, 0);
+    set_batch_network(net, 2);
+    fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay);
+    srand(time(0));
+
+    list *plist = get_paths(valid_images);
+    char **paths = (char **)list_to_array(plist);
+
+    layer l = net->layers[net->n-1];
+    int classes = l.classes;
+
+    char buff[1024];
+    char *type = option_find_str(options, "eval", "voc");
+    FILE *fp = 0;
+    FILE **fps = 0;
+    int coco = 0;
+    int imagenet = 0;
+    if(0==strcmp(type, "coco")){
+        if(!outfile) outfile = "coco_results";
+        snprintf(buff, 1024, "%s/%s.json", prefix, outfile);
+        fp = fopen(buff, "w");
+        fprintf(fp, "[\n");
+        coco = 1;
+    } else if(0==strcmp(type, "imagenet")){
+        if(!outfile) outfile = "imagenet-detection";
+        snprintf(buff, 1024, "%s/%s.txt", prefix, outfile);
+        fp = fopen(buff, "w");
+        imagenet = 1;
+        classes = 200;
+    } else {
+        if(!outfile) outfile = "comp4_det_test_";
+        fps = calloc(classes, sizeof(FILE *));
+        for(j = 0; j < classes; ++j){
+            snprintf(buff, 1024, "%s/%s%s.txt", prefix, outfile, names[j]);
+            fps[j] = fopen(buff, "w");
+        }
+    }
+
+    int m = plist->size;
+    int i=0;
+    int t;
+
+    float thresh = .005;
+    float nms = .45;
+
+    int nthreads = 4;
+    image *val = calloc(nthreads, sizeof(image));
+    image *val_resized = calloc(nthreads, sizeof(image));
+    image *buf = calloc(nthreads, sizeof(image));
+    image *buf_resized = calloc(nthreads, sizeof(image));
+    pthread_t *thr = calloc(nthreads, sizeof(pthread_t));
+
+    image input = make_image(net->w, net->h, net->c*2);
+
+    load_args args = {0};
+    args.w = net->w;
+    args.h = net->h;
+    //args.type = IMAGE_DATA;
+    args.type = LETTERBOX_DATA;
+
+    for(t = 0; t < nthreads; ++t){
+        args.path = paths[i+t];
+        args.im = &buf[t];
+        args.resized = &buf_resized[t];
+        thr[t] = load_data_in_thread(args);
+    }
+    double start = what_time_is_it_now();
+    for(i = nthreads; i < m+nthreads; i += nthreads){
+        fprintf(stderr, "%d\n", i);
+        for(t = 0; t < nthreads && i+t-nthreads < m; ++t){
+            pthread_join(thr[t], 0);
+            val[t] = buf[t];
+            val_resized[t] = buf_resized[t];
+        }
+        for(t = 0; t < nthreads && i+t < m; ++t){
+            args.path = paths[i+t];
+            args.im = &buf[t];
+            args.resized = &buf_resized[t];
+            thr[t] = load_data_in_thread(args);
+        }
+        for(t = 0; t < nthreads && i+t-nthreads < m; ++t){
+            char *path = paths[i+t-nthreads];
+            char *id = basecfg(path);
+            copy_cpu(net->w*net->h*net->c, val_resized[t].data, 1, input.data, 1);
+            flip_image(val_resized[t]);
+            copy_cpu(net->w*net->h*net->c, val_resized[t].data, 1, input.data + net->w*net->h*net->c, 1);
+
+            network_predict(net, input.data);
+            int w = val[t].w;
+            int h = val[t].h;
+            int num = 0;
+            detection *dets = get_network_boxes(net, w, h, thresh, .5, map, 0, &num);
+            if (nms) do_nms_sort(dets, num, classes, nms);
+            if (coco){
+                print_cocos(fp, path, dets, num, classes, w, h);
+            } else if (imagenet){
+                print_imagenet_detections(fp, i+t-nthreads+1, dets, num, classes, w, h);
+            } else {
+                print_detector_detections(fps, id, dets, num, classes, w, h);
+            }
+            free_detections(dets, num);
+            free(id);
+            free_image(val[t]);
+            free_image(val_resized[t]);
+        }
+    }
+    for(j = 0; j < classes; ++j){
+        if(fps) fclose(fps[j]);
+    }
+    if(coco){
+        fseek(fp, -2, SEEK_CUR); 
+        fprintf(fp, "\n]\n");
+        fclose(fp);
+    }
+    fprintf(stderr, "Total Detection Time: %f Seconds\n", what_time_is_it_now() - start);
+}
+
+
+void validate_detector(char *datacfg, char *cfgfile, char *weightfile, char *outfile)
+{
+    int j;
+    list *options = read_data_cfg(datacfg);
+    char *valid_images = option_find_str(options, "valid", "data/train.list");
+    char *name_list = option_find_str(options, "names", "data/names.list");
+    char *prefix = option_find_str(options, "results", "results");
+    char **names = get_labels(name_list);
+    char *mapf = option_find_str(options, "map", 0);
+    int *map = 0;
+    if (mapf) map = read_map(mapf);
+
+    network *net = load_network(cfgfile, weightfile, 0);
+    set_batch_network(net, 1);
+    fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay);
+    srand(time(0));
+
+    list *plist = get_paths(valid_images);
+    char **paths = (char **)list_to_array(plist);
+
+    layer l = net->layers[net->n-1];
+    int classes = l.classes;
+
+    char buff[1024];
+    char *type = option_find_str(options, "eval", "voc");
+    FILE *fp = 0;
+    FILE **fps = 0;
+    int coco = 0;
+    int imagenet = 0;
+    if(0==strcmp(type, "coco")){
+        if(!outfile) outfile = "coco_results";
+        snprintf(buff, 1024, "%s/%s.json", prefix, outfile);
+        fp = fopen(buff, "w");
+        fprintf(fp, "[\n");
+        coco = 1;
+    } else if(0==strcmp(type, "imagenet")){
+        if(!outfile) outfile = "imagenet-detection";
+        snprintf(buff, 1024, "%s/%s.txt", prefix, outfile);
+        fp = fopen(buff, "w");
+        imagenet = 1;
+        classes = 200;
+    } else {
+        if(!outfile) outfile = "comp4_det_test_";
+        fps = calloc(classes, sizeof(FILE *));
+        for(j = 0; j < classes; ++j){
+            snprintf(buff, 1024, "%s/%s%s.txt", prefix, outfile, names[j]);
+            fps[j] = fopen(buff, "w");
+        }
+    }
+
+
+    int m = plist->size;
+    int i=0;
+    int t;
+
+    float thresh = .005;
+    float nms = .45;
+
+    int nthreads = 4;
+    image *val = calloc(nthreads, sizeof(image));
+    image *val_resized = calloc(nthreads, sizeof(image));
+    image *buf = calloc(nthreads, sizeof(image));
+    image *buf_resized = calloc(nthreads, sizeof(image));
+    pthread_t *thr = calloc(nthreads, sizeof(pthread_t));
+
+    load_args args = {0};
+    args.w = net->w;
+    args.h = net->h;
+    //args.type = IMAGE_DATA;
+    args.type = LETTERBOX_DATA;
+
+    for(t = 0; t < nthreads; ++t){
+        args.path = paths[i+t];
+        args.im = &buf[t];
+        args.resized = &buf_resized[t];
+        thr[t] = load_data_in_thread(args);
+    }
+    double start = what_time_is_it_now();
+    for(i = nthreads; i < m+nthreads; i += nthreads){
+        fprintf(stderr, "%d\n", i);
+        for(t = 0; t < nthreads && i+t-nthreads < m; ++t){
+            pthread_join(thr[t], 0);
+            val[t] = buf[t];
+            val_resized[t] = buf_resized[t];
+        }
+        for(t = 0; t < nthreads && i+t < m; ++t){
+            args.path = paths[i+t];
+            args.im = &buf[t];
+            args.resized = &buf_resized[t];
+            thr[t] = load_data_in_thread(args);
+        }
+        for(t = 0; t < nthreads && i+t-nthreads < m; ++t){
+            char *path = paths[i+t-nthreads];
+            char *id = basecfg(path);
+            float *X = val_resized[t].data;
+            network_predict(net, X);
+            int w = val[t].w;
+            int h = val[t].h;
+            int nboxes = 0;
+            detection *dets = get_network_boxes(net, w, h, thresh, .5, map, 0, &nboxes);
+            if (nms) do_nms_sort(dets, nboxes, classes, nms);
+            if (coco){
+                print_cocos(fp, path, dets, nboxes, classes, w, h);
+            } else if (imagenet){
+                print_imagenet_detections(fp, i+t-nthreads+1, dets, nboxes, classes, w, h);
+            } else {
+                print_detector_detections(fps, id, dets, nboxes, classes, w, h);
+            }
+            free_detections(dets, nboxes);
+            free(id);
+            free_image(val[t]);
+            free_image(val_resized[t]);
+        }
+    }
+    for(j = 0; j < classes; ++j){
+        if(fps) fclose(fps[j]);
+    }
+    if(coco){
+        fseek(fp, -2, SEEK_CUR); 
+        fprintf(fp, "\n]\n");
+        fclose(fp);
+    }
+    fprintf(stderr, "Total Detection Time: %f Seconds\n", what_time_is_it_now() - start);
+}
+
+void validate_detector_recall(char *cfgfile, char *weightfile)
+{
+    network *net = load_network(cfgfile, weightfile, 0);
+    set_batch_network(net, 1);
+    fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay);
+    srand(time(0));
+
+    list *plist = get_paths("data/coco_val_5k.list");
+    char **paths = (char **)list_to_array(plist);
+
+    layer l = net->layers[net->n-1];
+
+    int j, k;
+
+    int m = plist->size;
+    int i=0;
+
+    float thresh = .001;
+    float iou_thresh = .5;
+    float nms = .4;
+
+    int total = 0;
+    int correct = 0;
+    int proposals = 0;
+    float avg_iou = 0;
+
+    for(i = 0; i < m; ++i){
+        char *path = paths[i];
+        image orig = load_image_color(path, 0, 0);
+        image sized = resize_image(orig, net->w, net->h);
+        char *id = basecfg(path);
+        network_predict(net, sized.data);
+        int nboxes = 0;
+        detection *dets = get_network_boxes(net, sized.w, sized.h, thresh, .5, 0, 1, &nboxes);
+        if (nms) do_nms_obj(dets, nboxes, 1, nms);
+
+        char labelpath[4096];
+        find_replace(path, "images", "labels", labelpath);
+        find_replace(labelpath, "JPEGImages", "labels", labelpath);
+        find_replace(labelpath, ".jpg", ".txt", labelpath);
+        find_replace(labelpath, ".JPEG", ".txt", labelpath);
+
+        int num_labels = 0;
+        box_label *truth = read_boxes(labelpath, &num_labels);
+        for(k = 0; k < nboxes; ++k){
+            if(dets[k].objectness > thresh){
+                ++proposals;
+            }
+        }
+        for (j = 0; j < num_labels; ++j) {
+            ++total;
+            box t = {truth[j].x, truth[j].y, truth[j].w, truth[j].h};
+            float best_iou = 0;
+            for(k = 0; k < l.w*l.h*l.n; ++k){
+                float iou = box_iou(dets[k].bbox, t);
+                if(dets[k].objectness > thresh && iou > best_iou){
+                    best_iou = iou;
+                }
+            }
+            avg_iou += best_iou;
+            if(best_iou > iou_thresh){
+                ++correct;
+            }
+        }
+
+        fprintf(stderr, "%5d %5d %5d\tRPs/Img: %.2f\tIOU: %.2f%%\tRecall:%.2f%%\n", i, correct, total, (float)proposals/(i+1), avg_iou*100/total, 100.*correct/total);
+        free(id);
+        free_image(orig);
+        free_image(sized);
+    }
+}
+
+
+void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filename, float thresh, float hier_thresh, char *outfile, int fullscreen)
+{
+    list *options = read_data_cfg(datacfg);
+    char *name_list = option_find_str(options, "names", "data/names.list");
+    char **names = get_labels(name_list);
+
+    image **alphabet = load_alphabet();
+    network *net = load_network(cfgfile, weightfile, 0);
+    set_batch_network(net, 1);
+    srand(2222222);
+    double time;
+    char buff[256];
+    char *input = buff;
+    float nms=.45;
+    while(1){
+        if(filename){
+            strncpy(input, filename, 256);
+        } else {
+            printf("Enter Image Path: ");
+            fflush(stdout);
+            input = fgets(input, 256, stdin);
+            if(!input) return;
+            strtok(input, "\n");
+        }
+        image im = load_image_color(input,0,0);
+        image sized = letterbox_image(im, net->w, net->h);
+        //image sized = resize_image(im, net->w, net->h);
+        //image sized2 = resize_max(im, net->w);
+        //image sized = crop_image(sized2, -((net->w - sized2.w)/2), -((net->h - sized2.h)/2), net->w, net->h);
+        //resize_network(net, sized.w, sized.h);
+        layer l = net->layers[net->n-1];
+
+
+        float *X = sized.data;
+        time=what_time_is_it_now();
+        network_predict(net, X);
+        printf("%s: Predicted in %f seconds.\n", input, what_time_is_it_now()-time);
+        int nboxes = 0;
+        detection *dets = get_network_boxes(net, im.w, im.h, thresh, hier_thresh, 0, 1, &nboxes);
+        //printf("%d\n", nboxes);
+        //if (nms) do_nms_obj(boxes, probs, l.w*l.h*l.n, l.classes, nms);
+        if (nms) do_nms_sort(dets, nboxes, l.classes, nms);
+        draw_detections(im, dets, nboxes, thresh, names, alphabet, l.classes);
+        free_detections(dets, nboxes);
+        if(outfile){
+            save_image(im, outfile);
+        }
+        else{
+            save_image(im, "predictions");
+#ifdef OPENCV
+            make_window("predictions", 512, 512, 0);
+            show_image(im, "predictions", 0);
+#endif
+        }
+
+        free_image(im);
+        free_image(sized);
+        if (filename) break;
+    }
+}
+
+/*
+void censor_detector(char *datacfg, char *cfgfile, char *weightfile, int cam_index, const char *filename, int class, float thresh, int skip)
+{
+#ifdef OPENCV
+    char *base = basecfg(cfgfile);
+    network *net = load_network(cfgfile, weightfile, 0);
+    set_batch_network(net, 1);
+
+    srand(2222222);
+    CvCapture * cap;
+
+    int w = 1280;
+    int h = 720;
+
+    if(filename){
+        cap = cvCaptureFromFile(filename);
+    }else{
+        cap = cvCaptureFromCAM(cam_index);
+    }
+
+    if(w){
+        cvSetCaptureProperty(cap, CV_CAP_PROP_FRAME_WIDTH, w);
+    }
+    if(h){
+        cvSetCaptureProperty(cap, CV_CAP_PROP_FRAME_HEIGHT, h);
+    }
+
+    if(!cap) error("Couldn't connect to webcam.\n");
+    cvNamedWindow(base, CV_WINDOW_NORMAL); 
+    cvResizeWindow(base, 512, 512);
+    float fps = 0;
+    int i;
+    float nms = .45;
+
+    while(1){
+        image in = get_image_from_stream(cap);
+        //image in_s = resize_image(in, net->w, net->h);
+        image in_s = letterbox_image(in, net->w, net->h);
+        layer l = net->layers[net->n-1];
+
+        float *X = in_s.data;
+        network_predict(net, X);
+        int nboxes = 0;
+        detection *dets = get_network_boxes(net, in.w, in.h, thresh, 0, 0, 0, &nboxes);
+        //if (nms) do_nms_obj(boxes, probs, l.w*l.h*l.n, l.classes, nms);
+        if (nms) do_nms_sort(dets, nboxes, l.classes, nms);
+
+        for(i = 0; i < nboxes; ++i){
+            if(dets[i].prob[class] > thresh){
+                box b = dets[i].bbox;
+                int left  = b.x-b.w/2.;
+                int top   = b.y-b.h/2.;
+                censor_image(in, left, top, b.w, b.h);
+            }
+        }
+        show_image(in, base);
+        cvWaitKey(10);
+        free_detections(dets, nboxes);
+
+
+        free_image(in_s);
+        free_image(in);
+
+
+        float curr = 0;
+        fps = .9*fps + .1*curr;
+        for(i = 0; i < skip; ++i){
+            image in = get_image_from_stream(cap);
+            free_image(in);
+        }
+    }
+    #endif
+}
+
+void extract_detector(char *datacfg, char *cfgfile, char *weightfile, int cam_index, const char *filename, int class, float thresh, int skip)
+{
+#ifdef OPENCV
+    char *base = basecfg(cfgfile);
+    network *net = load_network(cfgfile, weightfile, 0);
+    set_batch_network(net, 1);
+
+    srand(2222222);
+    CvCapture * cap;
+
+    int w = 1280;
+    int h = 720;
+
+    if(filename){
+        cap = cvCaptureFromFile(filename);
+    }else{
+        cap = cvCaptureFromCAM(cam_index);
+    }
+
+    if(w){
+        cvSetCaptureProperty(cap, CV_CAP_PROP_FRAME_WIDTH, w);
+    }
+    if(h){
+        cvSetCaptureProperty(cap, CV_CAP_PROP_FRAME_HEIGHT, h);
+    }
+
+    if(!cap) error("Couldn't connect to webcam.\n");
+    cvNamedWindow(base, CV_WINDOW_NORMAL); 
+    cvResizeWindow(base, 512, 512);
+    float fps = 0;
+    int i;
+    int count = 0;
+    float nms = .45;
+
+    while(1){
+        image in = get_image_from_stream(cap);
+        //image in_s = resize_image(in, net->w, net->h);
+        image in_s = letterbox_image(in, net->w, net->h);
+        layer l = net->layers[net->n-1];
+
+        show_image(in, base);
+
+        int nboxes = 0;
+        float *X = in_s.data;
+        network_predict(net, X);
+        detection *dets = get_network_boxes(net, in.w, in.h, thresh, 0, 0, 1, &nboxes);
+        //if (nms) do_nms_obj(boxes, probs, l.w*l.h*l.n, l.classes, nms);
+        if (nms) do_nms_sort(dets, nboxes, l.classes, nms);
+
+        for(i = 0; i < nboxes; ++i){
+            if(dets[i].prob[class] > thresh){
+                box b = dets[i].bbox;
+                int size = b.w*in.w > b.h*in.h ? b.w*in.w : b.h*in.h;
+                int dx  = b.x*in.w-size/2.;
+                int dy  = b.y*in.h-size/2.;
+                image bim = crop_image(in, dx, dy, size, size);
+                char buff[2048];
+                sprintf(buff, "results/extract/%07d", count);
+                ++count;
+                save_image(bim, buff);
+                free_image(bim);
+            }
+        }
+        free_detections(dets, nboxes);
+
+
+        free_image(in_s);
+        free_image(in);
+
+
+        float curr = 0;
+        fps = .9*fps + .1*curr;
+        for(i = 0; i < skip; ++i){
+            image in = get_image_from_stream(cap);
+            free_image(in);
+        }
+    }
+    #endif
+}
+*/
+
+/*
+void network_detect(network *net, image im, float thresh, float hier_thresh, float nms, detection *dets)
+{
+    network_predict_image(net, im);
+    layer l = net->layers[net->n-1];
+    int nboxes = num_boxes(net);
+    fill_network_boxes(net, im.w, im.h, thresh, hier_thresh, 0, 0, dets);
+    if (nms) do_nms_sort(dets, nboxes, l.classes, nms);
+}
+*/
+
+void run_detector(int argc, char **argv)
+{
+    char *prefix = find_char_arg(argc, argv, "-prefix", 0);
+    float thresh = find_float_arg(argc, argv, "-thresh", .5);
+    float hier_thresh = find_float_arg(argc, argv, "-hier", .5);
+    int cam_index = find_int_arg(argc, argv, "-c", 0);
+    int frame_skip = find_int_arg(argc, argv, "-s", 0);
+    int avg = find_int_arg(argc, argv, "-avg", 3);
+    if(argc < 4){
+        fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]);
+        return;
+    }
+    char *gpu_list = find_char_arg(argc, argv, "-gpus", 0);
+    char *outfile = find_char_arg(argc, argv, "-out", 0);
+    int *gpus = 0;
+    int gpu = 0;
+    int ngpus = 0;
+    if(gpu_list){
+        printf("%s\n", gpu_list);
+        int len = strlen(gpu_list);
+        ngpus = 1;
+        int i;
+        for(i = 0; i < len; ++i){
+            if (gpu_list[i] == ',') ++ngpus;
+        }
+        gpus = calloc(ngpus, sizeof(int));
+        for(i = 0; i < ngpus; ++i){
+            gpus[i] = atoi(gpu_list);
+            gpu_list = strchr(gpu_list, ',')+1;
+        }
+    } else {
+        gpu = gpu_index;
+        gpus = &gpu;
+        ngpus = 1;
+    }
+
+    int clear = find_arg(argc, argv, "-clear");
+    int fullscreen = find_arg(argc, argv, "-fullscreen");
+    int width = find_int_arg(argc, argv, "-w", 0);
+    int height = find_int_arg(argc, argv, "-h", 0);
+    int fps = find_int_arg(argc, argv, "-fps", 0);
+    //int class = find_int_arg(argc, argv, "-class", 0);
+
+    char *datacfg = argv[3];
+    char *cfg = argv[4];
+    char *weights = (argc > 5) ? argv[5] : 0;
+    char *filename = (argc > 6) ? argv[6]: 0;
+    if(0==strcmp(argv[2], "test")) test_detector(datacfg, cfg, weights, filename, thresh, hier_thresh, outfile, fullscreen);
+    else if(0==strcmp(argv[2], "train")) train_detector(datacfg, cfg, weights, gpus, ngpus, clear);
+    else if(0==strcmp(argv[2], "valid")) validate_detector(datacfg, cfg, weights, outfile);
+    else if(0==strcmp(argv[2], "valid2")) validate_detector_flip(datacfg, cfg, weights, outfile);
+    else if(0==strcmp(argv[2], "recall")) validate_detector_recall(cfg, weights);
+    else if(0==strcmp(argv[2], "demo")) {
+        list *options = read_data_cfg(datacfg);
+        int classes = option_find_int(options, "classes", 20);
+        char *name_list = option_find_str(options, "names", "data/names.list");
+        char **names = get_labels(name_list);
+        demo(cfg, weights, thresh, cam_index, filename, names, classes, frame_skip, prefix, avg, hier_thresh, width, height, fps, fullscreen);
+    }
+    //else if(0==strcmp(argv[2], "extract")) extract_detector(datacfg, cfg, weights, cam_index, filename, class, thresh, frame_skip);
+    //else if(0==strcmp(argv[2], "censor")) censor_detector(datacfg, cfg, weights, cam_index, filename, class, thresh, frame_skip);
+}

+ 806 - 0
include/darknet.h

@@ -0,0 +1,806 @@
+#ifndef DARKNET_API
+#define DARKNET_API
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <pthread.h>
+#include <time.h>
+
+#ifdef GPU
+    #define BLOCK 512
+
+    #include "cuda_runtime.h"
+    #include "curand.h"
+    #include "cublas_v2.h"
+
+    #ifdef CUDNN
+    #include "cudnn.h"
+    #endif
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define SECRET_NUM -1234
+extern int gpu_index;
+
+typedef struct{
+    int classes;
+    char **names;
+} metadata;
+
+metadata get_metadata(char *file);
+
+typedef struct{
+    int *leaf;
+    int n;
+    int *parent;
+    int *child;
+    int *group;
+    char **name;
+
+    int groups;
+    int *group_size;
+    int *group_offset;
+} tree;
+tree *read_tree(char *filename);
+
+typedef enum{
+    LOGISTIC, RELU, RELIE, LINEAR, RAMP, TANH, PLSE, LEAKY, ELU, LOGGY, STAIR, HARDTAN, LHTAN, SELU
+} ACTIVATION;
+
+typedef enum{
+    PNG, BMP, TGA, JPG
+} IMTYPE;
+
+typedef enum{
+    MULT, ADD, SUB, DIV
+} BINARY_ACTIVATION;
+
+typedef enum {
+    CONVOLUTIONAL,
+    DECONVOLUTIONAL,
+    CONNECTED,
+    MAXPOOL,
+    SOFTMAX,
+    DETECTION,
+    DROPOUT,
+    CROP,
+    ROUTE,
+    COST,
+    NORMALIZATION,
+    AVGPOOL,
+    LOCAL,
+    SHORTCUT,
+    ACTIVE,
+    RNN,
+    GRU,
+    LSTM,
+    CRNN,
+    BATCHNORM,
+    NETWORK,
+    XNOR,
+    REGION,
+    YOLO,
+    ISEG,
+    REORG,
+    UPSAMPLE,
+    LOGXENT,
+    L2NORM,
+    BLANK
+} LAYER_TYPE;
+
+typedef enum{
+    SSE, MASKED, L1, SEG, SMOOTH,WGAN
+} COST_TYPE;
+
+typedef struct{
+    int batch;
+    float learning_rate;
+    float momentum;
+    float decay;
+    int adam;
+    float B1;
+    float B2;
+    float eps;
+    int t;
+} update_args;
+
+struct network;
+typedef struct network network;
+
+struct layer;
+typedef struct layer layer;
+
+struct layer{
+    LAYER_TYPE type;
+    ACTIVATION activation;
+    COST_TYPE cost_type;
+    void (*forward)   (struct layer, struct network);
+    void (*backward)  (struct layer, struct network);
+    void (*update)    (struct layer, update_args);
+    void (*forward_gpu)   (struct layer, struct network);
+    void (*backward_gpu)  (struct layer, struct network);
+    void (*update_gpu)    (struct layer, update_args);
+    int batch_normalize;
+    int shortcut;
+    int batch;
+    int forced;
+    int flipped;
+    int inputs;
+    int outputs;
+    int nweights;
+    int nbiases;
+    int extra;
+    int truths;
+    int h,w,c;
+    int out_h, out_w, out_c;
+    int n;
+    int max_boxes;
+    int groups;
+    int size;
+    int side;
+    int stride;
+    int reverse;
+    int flatten;
+    int spatial;
+    int pad;
+    int sqrt;
+    int flip;
+    int index;
+    int binary;
+    int xnor;
+    int steps;
+    int hidden;
+    int truth;
+    float smooth;
+    float dot;
+    float angle;
+    float jitter;
+    float saturation;
+    float exposure;
+    float shift;
+    float ratio;
+    float learning_rate_scale;
+    float clip;
+    int noloss;
+    int softmax;
+    int classes;
+    int coords;
+    int background;
+    int rescore;
+    int objectness;
+    int joint;
+    int noadjust;
+    int reorg;
+    int log;
+    int tanh;
+    int *mask;
+    int total;
+
+    float alpha;
+    float beta;
+    float kappa;
+
+    float coord_scale;
+    float object_scale;
+    float noobject_scale;
+    float mask_scale;
+    float class_scale;
+    int bias_match;
+    int random;
+    float ignore_thresh;
+    float truth_thresh;
+    float thresh;
+    float focus;
+    int classfix;
+    int absolute;
+
+    int onlyforward;
+    int stopbackward;
+    int dontload;
+    int dontsave;
+    int dontloadscales;
+    int numload;
+
+    float temperature;
+    float probability;
+    float scale;
+
+    char  * cweights;
+    int   * indexes;
+    int   * input_layers;
+    int   * input_sizes;
+    int   * map;
+    int   * counts;
+    float ** sums;
+    float * rand;
+    float * cost;
+    float * state;
+    float * prev_state;
+    float * forgot_state;
+    float * forgot_delta;
+    float * state_delta;
+    float * combine_cpu;
+    float * combine_delta_cpu;
+
+    float * concat;
+    float * concat_delta;
+
+    float * binary_weights;
+
+    float * biases;
+    float * bias_updates;
+
+    float * scales;
+    float * scale_updates;
+
+    float * weights;
+    float * weight_updates;
+
+    float * delta;
+    float * output;
+    float * loss;
+    float * squared;
+    float * norms;
+
+    float * spatial_mean;
+    float * mean;
+    float * variance;
+
+    float * mean_delta;
+    float * variance_delta;
+
+    float * rolling_mean;
+    float * rolling_variance;
+
+    float * x;
+    float * x_norm;
+
+    float * m;
+    float * v;
+    
+    float * bias_m;
+    float * bias_v;
+    float * scale_m;
+    float * scale_v;
+
+
+    float *z_cpu;
+    float *r_cpu;
+    float *h_cpu;
+    float * prev_state_cpu;
+
+    float *temp_cpu;
+    float *temp2_cpu;
+    float *temp3_cpu;
+
+    float *dh_cpu;
+    float *hh_cpu;
+    float *prev_cell_cpu;
+    float *cell_cpu;
+    float *f_cpu;
+    float *i_cpu;
+    float *g_cpu;
+    float *o_cpu;
+    float *c_cpu;
+    float *dc_cpu; 
+
+    float * binary_input;
+
+    struct layer *input_layer;
+    struct layer *self_layer;
+    struct layer *output_layer;
+
+    struct layer *reset_layer;
+    struct layer *update_layer;
+    struct layer *state_layer;
+
+    struct layer *input_gate_layer;
+    struct layer *state_gate_layer;
+    struct layer *input_save_layer;
+    struct layer *state_save_layer;
+    struct layer *input_state_layer;
+    struct layer *state_state_layer;
+
+    struct layer *input_z_layer;
+    struct layer *state_z_layer;
+
+    struct layer *input_r_layer;
+    struct layer *state_r_layer;
+
+    struct layer *input_h_layer;
+    struct layer *state_h_layer;
+	
+    struct layer *wz;
+    struct layer *uz;
+    struct layer *wr;
+    struct layer *ur;
+    struct layer *wh;
+    struct layer *uh;
+    struct layer *uo;
+    struct layer *wo;
+    struct layer *uf;
+    struct layer *wf;
+    struct layer *ui;
+    struct layer *wi;
+    struct layer *ug;
+    struct layer *wg;
+
+    tree *softmax_tree;
+
+    size_t workspace_size;
+
+#ifdef GPU
+    int *indexes_gpu;
+
+    float *z_gpu;
+    float *r_gpu;
+    float *h_gpu;
+
+    float *temp_gpu;
+    float *temp2_gpu;
+    float *temp3_gpu;
+
+    float *dh_gpu;
+    float *hh_gpu;
+    float *prev_cell_gpu;
+    float *cell_gpu;
+    float *f_gpu;
+    float *i_gpu;
+    float *g_gpu;
+    float *o_gpu;
+    float *c_gpu;
+    float *dc_gpu; 
+
+    float *m_gpu;
+    float *v_gpu;
+    float *bias_m_gpu;
+    float *scale_m_gpu;
+    float *bias_v_gpu;
+    float *scale_v_gpu;
+
+    float * combine_gpu;
+    float * combine_delta_gpu;
+
+    float * prev_state_gpu;
+    float * forgot_state_gpu;
+    float * forgot_delta_gpu;
+    float * state_gpu;
+    float * state_delta_gpu;
+    float * gate_gpu;
+    float * gate_delta_gpu;
+    float * save_gpu;
+    float * save_delta_gpu;
+    float * concat_gpu;
+    float * concat_delta_gpu;
+
+    float * binary_input_gpu;
+    float * binary_weights_gpu;
+
+    float * mean_gpu;
+    float * variance_gpu;
+
+    float * rolling_mean_gpu;
+    float * rolling_variance_gpu;
+
+    float * variance_delta_gpu;
+    float * mean_delta_gpu;
+
+    float * x_gpu;
+    float * x_norm_gpu;
+    float * weights_gpu;
+    float * weight_updates_gpu;
+    float * weight_change_gpu;
+
+    float * biases_gpu;
+    float * bias_updates_gpu;
+    float * bias_change_gpu;
+
+    float * scales_gpu;
+    float * scale_updates_gpu;
+    float * scale_change_gpu;
+
+    float * output_gpu;
+    float * loss_gpu;
+    float * delta_gpu;
+    float * rand_gpu;
+    float * squared_gpu;
+    float * norms_gpu;
+#ifdef CUDNN
+    cudnnTensorDescriptor_t srcTensorDesc, dstTensorDesc;
+    cudnnTensorDescriptor_t dsrcTensorDesc, ddstTensorDesc;
+    cudnnTensorDescriptor_t normTensorDesc;
+    cudnnFilterDescriptor_t weightDesc;
+    cudnnFilterDescriptor_t dweightDesc;
+    cudnnConvolutionDescriptor_t convDesc;
+    cudnnConvolutionFwdAlgo_t fw_algo;
+    cudnnConvolutionBwdDataAlgo_t bd_algo;
+    cudnnConvolutionBwdFilterAlgo_t bf_algo;
+#endif
+#endif
+};
+
+void free_layer(layer);
+
+typedef enum {
+    CONSTANT, STEP, EXP, POLY, STEPS, SIG, RANDOM
+} learning_rate_policy;
+
+typedef struct network{
+    int n;
+    int batch;
+    size_t *seen;
+    int *t;
+    float epoch;
+    int subdivisions;
+    layer *layers;
+    float *output;
+    learning_rate_policy policy;
+
+    float learning_rate;
+    float momentum;
+    float decay;
+    float gamma;
+    float scale;
+    float power;
+    int time_steps;
+    int step;
+    int max_batches;
+    float *scales;
+    int   *steps;
+    int num_steps;
+    int burn_in;
+
+    int adam;
+    float B1;
+    float B2;
+    float eps;
+
+    int inputs;
+    int outputs;
+    int truths;
+    int notruth;
+    int h, w, c;
+    int max_crop;
+    int min_crop;
+    float max_ratio;
+    float min_ratio;
+    int center;
+    float angle;
+    float aspect;
+    float exposure;
+    float saturation;
+    float hue;
+    int random;
+
+    int gpu_index;
+    tree *hierarchy;
+
+    float *input;
+    float *truth;
+    float *delta;
+    float *workspace;
+    int train;
+    int index;
+    float *cost;
+    float clip;
+
+#ifdef GPU
+    float *input_gpu;
+    float *truth_gpu;
+    float *delta_gpu;
+    float *output_gpu;
+#endif
+
+} network;
+
+typedef struct {
+    int w;
+    int h;
+    float scale;
+    float rad;
+    float dx;
+    float dy;
+    float aspect;
+} augment_args;
+
+typedef struct {
+    int w;
+    int h;
+    int c;
+    float *data;
+} image;
+
+typedef struct{
+    float x, y, w, h;
+} box;
+
+typedef struct detection{
+    box bbox;
+    int classes;
+    float *prob;
+    float *mask;
+    float objectness;
+    int sort_class;
+} detection;
+
+typedef struct matrix{
+    int rows, cols;
+    float **vals;
+} matrix;
+
+
+typedef struct{
+    int w, h;
+    matrix X;
+    matrix y;
+    int shallow;
+    int *num_boxes;
+    box **boxes;
+} data;
+
+typedef enum {
+    CLASSIFICATION_DATA, DETECTION_DATA, CAPTCHA_DATA, REGION_DATA, IMAGE_DATA, COMPARE_DATA, WRITING_DATA, SWAG_DATA, TAG_DATA, OLD_CLASSIFICATION_DATA, STUDY_DATA, DET_DATA, SUPER_DATA, LETTERBOX_DATA, REGRESSION_DATA, SEGMENTATION_DATA, INSTANCE_DATA, ISEG_DATA
+} data_type;
+
+typedef struct load_args{
+    int threads;
+    char **paths;
+    char *path;
+    int n;
+    int m;
+    char **labels;
+    int h;
+    int w;
+    int out_w;
+    int out_h;
+    int nh;
+    int nw;
+    int num_boxes;
+    int min, max, size;
+    int classes;
+    int background;
+    int scale;
+    int center;
+    int coords;
+    float jitter;
+    float angle;
+    float aspect;
+    float saturation;
+    float exposure;
+    float hue;
+    data *d;
+    image *im;
+    image *resized;
+    data_type type;
+    tree *hierarchy;
+} load_args;
+
+typedef struct{
+    int id;
+    float x,y,w,h;
+    float left, right, top, bottom;
+} box_label;
+
+
+network *load_network(char *cfg, char *weights, int clear);
+load_args get_base_args(network *net);
+
+void free_data(data d);
+
+typedef struct node{
+    void *val;
+    struct node *next;
+    struct node *prev;
+} node;
+
+typedef struct list{
+    int size;
+    node *front;
+    node *back;
+} list;
+
+pthread_t load_data(load_args args);
+list *read_data_cfg(char *filename);
+list *read_cfg(char *filename);
+unsigned char *read_file(char *filename);
+data resize_data(data orig, int w, int h);
+data *tile_data(data orig, int divs, int size);
+data select_data(data *orig, int *inds);
+
+void forward_network(network *net);
+void backward_network(network *net);
+void update_network(network *net);
+
+
+float dot_cpu(int N, float *X, int INCX, float *Y, int INCY);
+void axpy_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY);
+void copy_cpu(int N, float *X, int INCX, float *Y, int INCY);
+void scal_cpu(int N, float ALPHA, float *X, int INCX);
+void fill_cpu(int N, float ALPHA, float * X, int INCX);
+void normalize_cpu(float *x, float *mean, float *variance, int batch, int filters, int spatial);
+void softmax(float *input, int n, float temp, int stride, float *output);
+
+int best_3d_shift_r(image a, image b, int min, int max);
+#ifdef GPU
+void axpy_gpu(int N, float ALPHA, float * X, int INCX, float * Y, int INCY);
+void fill_gpu(int N, float ALPHA, float * X, int INCX);
+void scal_gpu(int N, float ALPHA, float * X, int INCX);
+void copy_gpu(int N, float * X, int INCX, float * Y, int INCY);
+
+void cuda_set_device(int n);
+void cuda_free(float *x_gpu);
+float *cuda_make_array(float *x, size_t n);
+void cuda_pull_array(float *x_gpu, float *x, size_t n);
+float cuda_mag_array(float *x_gpu, size_t n);
+void cuda_push_array(float *x_gpu, float *x, size_t n);
+
+void forward_network_gpu(network *net);
+void backward_network_gpu(network *net);
+void update_network_gpu(network *net);
+
+float train_networks(network **nets, int n, data d, int interval);
+void sync_nets(network **nets, int n, int interval);
+void harmless_update_network_gpu(network *net);
+#endif
+image get_label(image **characters, char *string, int size);
+void draw_label(image a, int r, int c, image label, const float *rgb);
+void save_image(image im, const char *name);
+void save_image_options(image im, const char *name, IMTYPE f, int quality);
+void get_next_batch(data d, int n, int offset, float *X, float *y);
+void grayscale_image_3c(image im);
+void normalize_image(image p);
+void matrix_to_csv(matrix m);
+float train_network_sgd(network *net, data d, int n);
+void rgbgr_image(image im);
+data copy_data(data d);
+data concat_data(data d1, data d2);
+data load_cifar10_data(char *filename);
+float matrix_topk_accuracy(matrix truth, matrix guess, int k);
+void matrix_add_matrix(matrix from, matrix to);
+void scale_matrix(matrix m, float scale);
+matrix csv_to_matrix(char *filename);
+float *network_accuracies(network *net, data d, int n);
+float train_network_datum(network *net);
+image make_random_image(int w, int h, int c);
+
+void denormalize_connected_layer(layer l);
+void denormalize_convolutional_layer(layer l);
+void statistics_connected_layer(layer l);
+void rescale_weights(layer l, float scale, float trans);
+void rgbgr_weights(layer l);
+image *get_weights(layer l);
+
+void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const char *filename, char **names, int classes, int frame_skip, char *prefix, int avg, float hier_thresh, int w, int h, int fps, int fullscreen);
+void get_detection_detections(layer l, int w, int h, float thresh, detection *dets);
+
+char *option_find_str(list *l, char *key, char *def);
+int option_find_int(list *l, char *key, int def);
+int option_find_int_quiet(list *l, char *key, int def);
+
+network *parse_network_cfg(char *filename);
+void save_weights(network *net, char *filename);
+void load_weights(network *net, char *filename);
+void save_weights_upto(network *net, char *filename, int cutoff);
+void load_weights_upto(network *net, char *filename, int start, int cutoff);
+
+void zero_objectness(layer l);
+void get_region_detections(layer l, int w, int h, int netw, int neth, float thresh, int *map, float tree_thresh, int relative, detection *dets);
+int get_yolo_detections(layer l, int w, int h, int netw, int neth, float thresh, int *map, int relative, detection *dets);
+void free_network(network *net);
+void set_batch_network(network *net, int b);
+void set_temp_network(network *net, float t);
+image load_image(char *filename, int w, int h, int c);
+image load_image_color(char *filename, int w, int h);
+image make_image(int w, int h, int c);
+image resize_image(image im, int w, int h);
+void censor_image(image im, int dx, int dy, int w, int h);
+image letterbox_image(image im, int w, int h);
+image crop_image(image im, int dx, int dy, int w, int h);
+image center_crop_image(image im, int w, int h);
+image resize_min(image im, int min);
+image resize_max(image im, int max);
+image threshold_image(image im, float thresh);
+image mask_to_rgb(image mask);
+int resize_network(network *net, int w, int h);
+void free_matrix(matrix m);
+void test_resize(char *filename);
+int show_image(image p, const char *name, int ms);
+image copy_image(image p);
+void draw_box_width(image a, int x1, int y1, int x2, int y2, int w, float r, float g, float b);
+float get_current_rate(network *net);
+void composite_3d(char *f1, char *f2, char *out, int delta);
+data load_data_old(char **paths, int n, int m, char **labels, int k, int w, int h);
+size_t get_current_batch(network *net);
+void constrain_image(image im);
+image get_network_image_layer(network *net, int i);
+layer get_network_output_layer(network *net);
+void top_predictions(network *net, int n, int *index);
+void flip_image(image a);
+image float_to_image(int w, int h, int c, float *data);
+void ghost_image(image source, image dest, int dx, int dy);
+float network_accuracy(network *net, data d);
+void random_distort_image(image im, float hue, float saturation, float exposure);
+void fill_image(image m, float s);
+image grayscale_image(image im);
+void rotate_image_cw(image im, int times);
+double what_time_is_it_now();
+image rotate_image(image m, float rad);
+void visualize_network(network *net);
+float box_iou(box a, box b);
+data load_all_cifar10();
+box_label *read_boxes(char *filename, int *n);
+box float_to_box(float *f, int stride);
+void draw_detections(image im, detection *dets, int num, float thresh, char **names, image **alphabet, int classes);
+
+matrix network_predict_data(network *net, data test);
+image **load_alphabet();
+image get_network_image(network *net);
+float *network_predict(network *net, float *input);
+
+int network_width(network *net);
+int network_height(network *net);
+float *network_predict_image(network *net, image im);
+void network_detect(network *net, image im, float thresh, float hier_thresh, float nms, detection *dets);
+detection *get_network_boxes(network *net, int w, int h, float thresh, float hier, int *map, int relative, int *num);
+void free_detections(detection *dets, int n);
+
+void reset_network_state(network *net, int b);
+
+char **get_labels(char *filename);
+void do_nms_obj(detection *dets, int total, int classes, float thresh);
+void do_nms_sort(detection *dets, int total, int classes, float thresh);
+
+matrix make_matrix(int rows, int cols);
+
+#ifdef OPENCV
+void *open_video_stream(const char *f, int c, int w, int h, int fps);
+image get_image_from_stream(void *p);
+void make_window(char *name, int w, int h, int fullscreen);
+#endif
+
+void free_image(image m);
+float train_network(network *net, data d);
+pthread_t load_data_in_thread(load_args args);
+void load_data_blocking(load_args args);
+list *get_paths(char *filename);
+void hierarchy_predictions(float *predictions, int n, tree *hier, int only_leaves, int stride);
+void change_leaves(tree *t, char *leaf_list);
+
+int find_int_arg(int argc, char **argv, char *arg, int def);
+float find_float_arg(int argc, char **argv, char *arg, float def);
+int find_arg(int argc, char* argv[], char *arg);
+char *find_char_arg(int argc, char **argv, char *arg, char *def);
+char *basecfg(char *cfgfile);
+void find_replace(char *str, char *orig, char *rep, char *output);
+void free_ptrs(void **ptrs, int n);
+char *fgetl(FILE *fp);
+void strip(char *s);
+float sec(clock_t clocks);
+void **list_to_array(list *l);
+void top_k(float *a, int n, int k, int *index);
+int *read_map(char *filename);
+void error(const char *s);
+int max_index(float *a, int n);
+int max_int_index(int *a, int n);
+int sample_array(float *a, int n);
+int *random_index_order(int min, int max);
+void free_list(list *l);
+float mse_array(float *a, int n);
+float variance_array(float *a, int n);
+float mag_array(float *a, int n);
+void scale_array(float *a, int n, float s);
+float mean_array(float *a, int n);
+float sum_array(float *a, int n);
+void normalize_array(float *a, int n);
+int *read_intlist(char *s, int *n, int d);
+size_t rand_size_t();
+float rand_normal();
+float rand_uniform(float min, float max);
+
+#ifdef __cplusplus
+}
+#endif
+#endif

+ 1468 - 0
src/image.c

@@ -0,0 +1,1468 @@
+#include "image.h"
+#include "utils.h"
+#include "blas.h"
+#include "cuda.h"
+#include <stdio.h>
+#include <math.h>
+
+#define STB_IMAGE_IMPLEMENTATION
+#include "stb_image.h"
+#define STB_IMAGE_WRITE_IMPLEMENTATION
+#include "stb_image_write.h"
+
+int windows = 0;
+
+float colors[6][3] = { {1,0,1}, {0,0,1},{0,1,1},{0,1,0},{1,1,0},{1,0,0} };
+
+float get_color(int c, int x, int max)
+{
+    float ratio = ((float)x/max)*5;
+    int i = floor(ratio);
+    int j = ceil(ratio);
+    ratio -= i;
+    float r = (1-ratio) * colors[i][c] + ratio*colors[j][c];
+    //printf("%f\n", r);
+    return r;
+}
+
+image mask_to_rgb(image mask)
+{
+    int n = mask.c;
+    image im = make_image(mask.w, mask.h, 3);
+    int i, j;
+    for(j = 0; j < n; ++j){
+        int offset = j*123457 % n;
+        float red = get_color(2,offset,n);
+        float green = get_color(1,offset,n);
+        float blue = get_color(0,offset,n);
+        for(i = 0; i < im.w*im.h; ++i){
+            im.data[i + 0*im.w*im.h] += mask.data[j*im.h*im.w + i]*red;
+            im.data[i + 1*im.w*im.h] += mask.data[j*im.h*im.w + i]*green;
+            im.data[i + 2*im.w*im.h] += mask.data[j*im.h*im.w + i]*blue;
+        }
+    }
+    return im;
+}
+
+static float get_pixel(image m, int x, int y, int c)
+{
+    assert(x < m.w && y < m.h && c < m.c);
+    return m.data[c*m.h*m.w + y*m.w + x];
+}
+static float get_pixel_extend(image m, int x, int y, int c)
+{
+    if(x < 0 || x >= m.w || y < 0 || y >= m.h) return 0;
+    /*
+    if(x < 0) x = 0;
+    if(x >= m.w) x = m.w-1;
+    if(y < 0) y = 0;
+    if(y >= m.h) y = m.h-1;
+    */
+    if(c < 0 || c >= m.c) return 0;
+    return get_pixel(m, x, y, c);
+}
+static void set_pixel(image m, int x, int y, int c, float val)
+{
+    if (x < 0 || y < 0 || c < 0 || x >= m.w || y >= m.h || c >= m.c) return;
+    assert(x < m.w && y < m.h && c < m.c);
+    m.data[c*m.h*m.w + y*m.w + x] = val;
+}
+static void add_pixel(image m, int x, int y, int c, float val)
+{
+    assert(x < m.w && y < m.h && c < m.c);
+    m.data[c*m.h*m.w + y*m.w + x] += val;
+}
+
+static float bilinear_interpolate(image im, float x, float y, int c)
+{
+    int ix = (int) floorf(x);
+    int iy = (int) floorf(y);
+
+    float dx = x - ix;
+    float dy = y - iy;
+
+    float val = (1-dy) * (1-dx) * get_pixel_extend(im, ix, iy, c) + 
+        dy     * (1-dx) * get_pixel_extend(im, ix, iy+1, c) + 
+        (1-dy) *   dx   * get_pixel_extend(im, ix+1, iy, c) +
+        dy     *   dx   * get_pixel_extend(im, ix+1, iy+1, c);
+    return val;
+}
+
+
+void composite_image(image source, image dest, int dx, int dy)
+{
+    int x,y,k;
+    for(k = 0; k < source.c; ++k){
+        for(y = 0; y < source.h; ++y){
+            for(x = 0; x < source.w; ++x){
+                float val = get_pixel(source, x, y, k);
+                float val2 = get_pixel_extend(dest, dx+x, dy+y, k);
+                set_pixel(dest, dx+x, dy+y, k, val * val2);
+            }
+        }
+    }
+}
+
+image border_image(image a, int border)
+{
+    image b = make_image(a.w + 2*border, a.h + 2*border, a.c);
+    int x,y,k;
+    for(k = 0; k < b.c; ++k){
+        for(y = 0; y < b.h; ++y){
+            for(x = 0; x < b.w; ++x){
+                float val = get_pixel_extend(a, x - border, y - border, k);
+                if(x - border < 0 || x - border >= a.w || y - border < 0 || y - border >= a.h) val = 1;
+                set_pixel(b, x, y, k, val);
+            }
+        }
+    }
+    return b;
+}
+
+image tile_images(image a, image b, int dx)
+{
+    if(a.w == 0) return copy_image(b);
+    image c = make_image(a.w + b.w + dx, (a.h > b.h) ? a.h : b.h, (a.c > b.c) ? a.c : b.c);
+    fill_cpu(c.w*c.h*c.c, 1, c.data, 1);
+    embed_image(a, c, 0, 0); 
+    composite_image(b, c, a.w + dx, 0);
+    return c;
+}
+
+image get_label(image **characters, char *string, int size)
+{
+    size = size/10;
+    if(size > 7) size = 7;
+    image label = make_empty_image(0,0,0);
+    while(*string){
+        image l = characters[size][(int)*string];
+        image n = tile_images(label, l, -size - 1 + (size+1)/2);
+        free_image(label);
+        label = n;
+        ++string;
+    }
+    image b = border_image(label, label.h*.25);
+    free_image(label);
+    return b;
+}
+
+void draw_label(image a, int r, int c, image label, const float *rgb)
+{
+    int w = label.w;
+    int h = label.h;
+    if (r - h >= 0) r = r - h;
+
+    int i, j, k;
+    for(j = 0; j < h && j + r < a.h; ++j){
+        for(i = 0; i < w && i + c < a.w; ++i){
+            for(k = 0; k < label.c; ++k){
+                float val = get_pixel(label, i, j, k);
+                set_pixel(a, i+c, j+r, k, rgb[k] * val);
+            }
+        }
+    }
+}
+
+void draw_box(image a, int x1, int y1, int x2, int y2, float r, float g, float b)
+{
+    //normalize_image(a);
+    int i;
+    if(x1 < 0) x1 = 0;
+    if(x1 >= a.w) x1 = a.w-1;
+    if(x2 < 0) x2 = 0;
+    if(x2 >= a.w) x2 = a.w-1;
+
+    if(y1 < 0) y1 = 0;
+    if(y1 >= a.h) y1 = a.h-1;
+    if(y2 < 0) y2 = 0;
+    if(y2 >= a.h) y2 = a.h-1;
+
+    for(i = x1; i <= x2; ++i){
+        a.data[i + y1*a.w + 0*a.w*a.h] = r;
+        a.data[i + y2*a.w + 0*a.w*a.h] = r;
+
+        a.data[i + y1*a.w + 1*a.w*a.h] = g;
+        a.data[i + y2*a.w + 1*a.w*a.h] = g;
+
+        a.data[i + y1*a.w + 2*a.w*a.h] = b;
+        a.data[i + y2*a.w + 2*a.w*a.h] = b;
+    }
+    for(i = y1; i <= y2; ++i){
+        a.data[x1 + i*a.w + 0*a.w*a.h] = r;
+        a.data[x2 + i*a.w + 0*a.w*a.h] = r;
+
+        a.data[x1 + i*a.w + 1*a.w*a.h] = g;
+        a.data[x2 + i*a.w + 1*a.w*a.h] = g;
+
+        a.data[x1 + i*a.w + 2*a.w*a.h] = b;
+        a.data[x2 + i*a.w + 2*a.w*a.h] = b;
+    }
+}
+
+void draw_box_width(image a, int x1, int y1, int x2, int y2, int w, float r, float g, float b)
+{
+    int i;
+    for(i = 0; i < w; ++i){
+        draw_box(a, x1+i, y1+i, x2-i, y2-i, r, g, b);
+    }
+}
+
+void draw_bbox(image a, box bbox, int w, float r, float g, float b)
+{
+    int left  = (bbox.x-bbox.w/2)*a.w;
+    int right = (bbox.x+bbox.w/2)*a.w;
+    int top   = (bbox.y-bbox.h/2)*a.h;
+    int bot   = (bbox.y+bbox.h/2)*a.h;
+
+    int i;
+    for(i = 0; i < w; ++i){
+        draw_box(a, left+i, top+i, right-i, bot-i, r, g, b);
+    }
+}
+
+image **load_alphabet()
+{
+    int i, j;
+    const int nsize = 8;
+    image **alphabets = calloc(nsize, sizeof(image));
+    for(j = 0; j < nsize; ++j){
+        alphabets[j] = calloc(128, sizeof(image));
+        for(i = 32; i < 127; ++i){
+            char buff[256];
+            sprintf(buff, "data/labels/%d_%d.png", i, j);
+            alphabets[j][i] = load_image_color(buff, 0, 0);
+        }
+    }
+    return alphabets;
+}
+
+void draw_detections(image im, detection *dets, int num, float thresh, char **names, image **alphabet, int classes)
+{
+    int i,j;
+
+    for(i = 0; i < num; ++i){
+        char labelstr[4096] = {0};
+        int class = -1;
+        for(j = 0; j < classes; ++j){
+            if (dets[i].prob[j] > thresh){
+                if (class < 0) {
+                    strcat(labelstr, names[j]);
+                    class = j;
+                } else {
+                    strcat(labelstr, ", ");
+                    strcat(labelstr, names[j]);
+                }
+                
+                printf("%s: %.2f%%\n", names[j], dets[i].prob[j]*100);
+            }
+        }
+        if(class >= 0){
+            int width = im.h * .006;
+
+            /*
+               if(0){
+               width = pow(prob, 1./2.)*10+1;
+               alphabet = 0;
+               }
+             */
+
+            //printf("%d %s: %.0f%%\n", i, names[class], prob*100);
+            int offset = class*123457 % classes;
+            float red = get_color(2,offset,classes);
+            float green = get_color(1,offset,classes);
+            float blue = get_color(0,offset,classes);
+            float rgb[3];
+
+            //width = prob*20+2;
+
+            rgb[0] = red;
+            rgb[1] = green;
+            rgb[2] = blue;
+            box b = dets[i].bbox;
+            //printf("%f %f %f %f\n", b.x, b.y, b.w, b.h);
+
+            int left  = (b.x-b.w/2.)*im.w;
+            int right = (b.x+b.w/2.)*im.w;
+            int top   = (b.y-b.h/2.)*im.h;
+            int bot   = (b.y+b.h/2.)*im.h;
+
+            if(left < 0) left = 0;
+            if(right > im.w-1) right = im.w-1;
+            if(top < 0) top = 0;
+            if(bot > im.h-1) bot = im.h-1;
+
+            draw_box_width(im, left, top, right, bot, width, red, green, blue);
+            if (alphabet) {
+                image label = get_label(alphabet, labelstr, (im.h*.03));
+                draw_label(im, top + width, left, label, rgb);
+                printf("pos=%d,%d,%d,%d\n", left, top, right-left, bot-top);
+                free_image(label);
+            }
+            if (dets[i].mask){
+                image mask = float_to_image(14, 14, 1, dets[i].mask);
+                image resized_mask = resize_image(mask, b.w*im.w, b.h*im.h);
+                image tmask = threshold_image(resized_mask, .5);
+                embed_image(tmask, im, left, top);
+                free_image(mask);
+                free_image(resized_mask);
+                free_image(tmask);
+            }
+        }
+    }
+}
+
+void transpose_image(image im)
+{
+    assert(im.w == im.h);
+    int n, m;
+    int c;
+    for(c = 0; c < im.c; ++c){
+        for(n = 0; n < im.w-1; ++n){
+            for(m = n + 1; m < im.w; ++m){
+                float swap = im.data[m + im.w*(n + im.h*c)];
+                im.data[m + im.w*(n + im.h*c)] = im.data[n + im.w*(m + im.h*c)];
+                im.data[n + im.w*(m + im.h*c)] = swap;
+            }
+        }
+    }
+}
+
+void rotate_image_cw(image im, int times)
+{
+    assert(im.w == im.h);
+    times = (times + 400) % 4;
+    int i, x, y, c;
+    int n = im.w;
+    for(i = 0; i < times; ++i){
+        for(c = 0; c < im.c; ++c){
+            for(x = 0; x < n/2; ++x){
+                for(y = 0; y < (n-1)/2 + 1; ++y){
+                    float temp = im.data[y + im.w*(x + im.h*c)];
+                    im.data[y + im.w*(x + im.h*c)] = im.data[n-1-x + im.w*(y + im.h*c)];
+                    im.data[n-1-x + im.w*(y + im.h*c)] = im.data[n-1-y + im.w*(n-1-x + im.h*c)];
+                    im.data[n-1-y + im.w*(n-1-x + im.h*c)] = im.data[x + im.w*(n-1-y + im.h*c)];
+                    im.data[x + im.w*(n-1-y + im.h*c)] = temp;
+                }
+            }
+        }
+    }
+}
+
+void flip_image(image a)
+{
+    int i,j,k;
+    for(k = 0; k < a.c; ++k){
+        for(i = 0; i < a.h; ++i){
+            for(j = 0; j < a.w/2; ++j){
+                int index = j + a.w*(i + a.h*(k));
+                int flip = (a.w - j - 1) + a.w*(i + a.h*(k));
+                float swap = a.data[flip];
+                a.data[flip] = a.data[index];
+                a.data[index] = swap;
+            }
+        }
+    }
+}
+
+image image_distance(image a, image b)
+{
+    int i,j;
+    image dist = make_image(a.w, a.h, 1);
+    for(i = 0; i < a.c; ++i){
+        for(j = 0; j < a.h*a.w; ++j){
+            dist.data[j] += pow(a.data[i*a.h*a.w+j]-b.data[i*a.h*a.w+j],2);
+        }
+    }
+    for(j = 0; j < a.h*a.w; ++j){
+        dist.data[j] = sqrt(dist.data[j]);
+    }
+    return dist;
+}
+
+void ghost_image(image source, image dest, int dx, int dy)
+{
+    int x,y,k;
+    float max_dist = sqrt((-source.w/2. + .5)*(-source.w/2. + .5));
+    for(k = 0; k < source.c; ++k){
+        for(y = 0; y < source.h; ++y){
+            for(x = 0; x < source.w; ++x){
+                float dist = sqrt((x - source.w/2. + .5)*(x - source.w/2. + .5) + (y - source.h/2. + .5)*(y - source.h/2. + .5));
+                float alpha = (1 - dist/max_dist);
+                if(alpha < 0) alpha = 0;
+                float v1 = get_pixel(source, x,y,k);
+                float v2 = get_pixel(dest, dx+x,dy+y,k);
+                float val = alpha*v1 + (1-alpha)*v2;
+                set_pixel(dest, dx+x, dy+y, k, val);
+            }
+        }
+    }
+}
+
+void blocky_image(image im, int s)
+{
+    int i,j,k;
+    for(k = 0; k < im.c; ++k){
+        for(j = 0; j < im.h; ++j){
+            for(i = 0; i < im.w; ++i){
+                im.data[i + im.w*(j + im.h*k)] = im.data[i/s*s + im.w*(j/s*s + im.h*k)];
+            }
+        }
+    }
+}
+
+void censor_image(image im, int dx, int dy, int w, int h)
+{
+    int i,j,k;
+    int s = 32;
+    if(dx < 0) dx = 0;
+    if(dy < 0) dy = 0;
+
+    for(k = 0; k < im.c; ++k){
+        for(j = dy; j < dy + h && j < im.h; ++j){
+            for(i = dx; i < dx + w && i < im.w; ++i){
+                im.data[i + im.w*(j + im.h*k)] = im.data[i/s*s + im.w*(j/s*s + im.h*k)];
+                //im.data[i + j*im.w + k*im.w*im.h] = 0;
+            }
+        }
+    }
+}
+
+void embed_image(image source, image dest, int dx, int dy)
+{
+    int x,y,k;
+    for(k = 0; k < source.c; ++k){
+        for(y = 0; y < source.h; ++y){
+            for(x = 0; x < source.w; ++x){
+                float val = get_pixel(source, x,y,k);
+                set_pixel(dest, dx+x, dy+y, k, val);
+            }
+        }
+    }
+}
+
+image collapse_image_layers(image source, int border)
+{
+    int h = source.h;
+    h = (h+border)*source.c - border;
+    image dest = make_image(source.w, h, 1);
+    int i;
+    for(i = 0; i < source.c; ++i){
+        image layer = get_image_layer(source, i);
+        int h_offset = i*(source.h+border);
+        embed_image(layer, dest, 0, h_offset);
+        free_image(layer);
+    }
+    return dest;
+}
+
+void constrain_image(image im)
+{
+    int i;
+    for(i = 0; i < im.w*im.h*im.c; ++i){
+        if(im.data[i] < 0) im.data[i] = 0;
+        if(im.data[i] > 1) im.data[i] = 1;
+    }
+}
+
+void normalize_image(image p)
+{
+    int i;
+    float min = 9999999;
+    float max = -999999;
+
+    for(i = 0; i < p.h*p.w*p.c; ++i){
+        float v = p.data[i];
+        if(v < min) min = v;
+        if(v > max) max = v;
+    }
+    if(max - min < .000000001){
+        min = 0;
+        max = 1;
+    }
+    for(i = 0; i < p.c*p.w*p.h; ++i){
+        p.data[i] = (p.data[i] - min)/(max-min);
+    }
+}
+
+void normalize_image2(image p)
+{
+    float *min = calloc(p.c, sizeof(float));
+    float *max = calloc(p.c, sizeof(float));
+    int i,j;
+    for(i = 0; i < p.c; ++i) min[i] = max[i] = p.data[i*p.h*p.w];
+
+    for(j = 0; j < p.c; ++j){
+        for(i = 0; i < p.h*p.w; ++i){
+            float v = p.data[i+j*p.h*p.w];
+            if(v < min[j]) min[j] = v;
+            if(v > max[j]) max[j] = v;
+        }
+    }
+    for(i = 0; i < p.c; ++i){
+        if(max[i] - min[i] < .000000001){
+            min[i] = 0;
+            max[i] = 1;
+        }
+    }
+    for(j = 0; j < p.c; ++j){
+        for(i = 0; i < p.w*p.h; ++i){
+            p.data[i+j*p.h*p.w] = (p.data[i+j*p.h*p.w] - min[j])/(max[j]-min[j]);
+        }
+    }
+    free(min);
+    free(max);
+}
+
+void copy_image_into(image src, image dest)
+{
+    memcpy(dest.data, src.data, src.h*src.w*src.c*sizeof(float));
+}
+
+image copy_image(image p)
+{
+    image copy = p;
+    copy.data = calloc(p.h*p.w*p.c, sizeof(float));
+    memcpy(copy.data, p.data, p.h*p.w*p.c*sizeof(float));
+    return copy;
+}
+
+void rgbgr_image(image im)
+{
+    int i;
+    for(i = 0; i < im.w*im.h; ++i){
+        float swap = im.data[i];
+        im.data[i] = im.data[i+im.w*im.h*2];
+        im.data[i+im.w*im.h*2] = swap;
+    }
+}
+
+int show_image(image p, const char *name, int ms)
+{
+#ifdef OPENCV
+    int c = show_image_cv(p, name, ms);
+    return c;
+#else
+    fprintf(stderr, "Not compiled with OpenCV, saving to %s.png instead\n", name);
+    save_image(p, name);
+    return -1;
+#endif
+}
+
+void save_image_options(image im, const char *name, IMTYPE f, int quality)
+{
+    char buff[256];
+    //sprintf(buff, "%s (%d)", name, windows);
+    if(f == PNG)       sprintf(buff, "%s.png", name);
+    else if (f == BMP) sprintf(buff, "%s.bmp", name);
+    else if (f == TGA) sprintf(buff, "%s.tga", name);
+    else if (f == JPG) sprintf(buff, "%s.jpg", name);
+    else               sprintf(buff, "%s.png", name);
+    unsigned char *data = calloc(im.w*im.h*im.c, sizeof(char));
+    int i,k;
+    for(k = 0; k < im.c; ++k){
+        for(i = 0; i < im.w*im.h; ++i){
+            data[i*im.c+k] = (unsigned char) (255*im.data[i + k*im.w*im.h]);
+        }
+    }
+    int success = 0;
+    if(f == PNG)       success = stbi_write_png(buff, im.w, im.h, im.c, data, im.w*im.c);
+    else if (f == BMP) success = stbi_write_bmp(buff, im.w, im.h, im.c, data);
+    else if (f == TGA) success = stbi_write_tga(buff, im.w, im.h, im.c, data);
+    else if (f == JPG) success = stbi_write_jpg(buff, im.w, im.h, im.c, data, quality);
+    free(data);
+    if(!success) fprintf(stderr, "Failed to write image %s\n", buff);
+}
+
+void save_image(image im, const char *name)
+{
+    save_image_options(im, name, JPG, 80);
+}
+
+void show_image_layers(image p, char *name)
+{
+    int i;
+    char buff[256];
+    for(i = 0; i < p.c; ++i){
+        sprintf(buff, "%s - Layer %d", name, i);
+        image layer = get_image_layer(p, i);
+        show_image(layer, buff, 1);
+        free_image(layer);
+    }
+}
+
+void show_image_collapsed(image p, char *name)
+{
+    image c = collapse_image_layers(p, 1);
+    show_image(c, name, 1);
+    free_image(c);
+}
+
+image make_empty_image(int w, int h, int c)
+{
+    image out;
+    out.data = 0;
+    out.h = h;
+    out.w = w;
+    out.c = c;
+    return out;
+}
+
+image make_image(int w, int h, int c)
+{
+    image out = make_empty_image(w,h,c);
+    out.data = calloc(h*w*c, sizeof(float));
+    return out;
+}
+
+image make_random_image(int w, int h, int c)
+{
+    image out = make_empty_image(w,h,c);
+    out.data = calloc(h*w*c, sizeof(float));
+    int i;
+    for(i = 0; i < w*h*c; ++i){
+        out.data[i] = (rand_normal() * .25) + .5;
+    }
+    return out;
+}
+
+image float_to_image(int w, int h, int c, float *data)
+{
+    image out = make_empty_image(w,h,c);
+    out.data = data;
+    return out;
+}
+
+void place_image(image im, int w, int h, int dx, int dy, image canvas)
+{
+    int x, y, c;
+    for(c = 0; c < im.c; ++c){
+        for(y = 0; y < h; ++y){
+            for(x = 0; x < w; ++x){
+                float rx = ((float)x / w) * im.w;
+                float ry = ((float)y / h) * im.h;
+                float val = bilinear_interpolate(im, rx, ry, c);
+                set_pixel(canvas, x + dx, y + dy, c, val);
+            }
+        }
+    }
+}
+
+image center_crop_image(image im, int w, int h)
+{
+    int m = (im.w < im.h) ? im.w : im.h;   
+    image c = crop_image(im, (im.w - m) / 2, (im.h - m)/2, m, m);
+    image r = resize_image(c, w, h);
+    free_image(c);
+    return r;
+}
+
+image rotate_crop_image(image im, float rad, float s, int w, int h, float dx, float dy, float aspect)
+{
+    int x, y, c;
+    float cx = im.w/2.;
+    float cy = im.h/2.;
+    image rot = make_image(w, h, im.c);
+    for(c = 0; c < im.c; ++c){
+        for(y = 0; y < h; ++y){
+            for(x = 0; x < w; ++x){
+                float rx = cos(rad)*((x - w/2.)/s*aspect + dx/s*aspect) - sin(rad)*((y - h/2.)/s + dy/s) + cx;
+                float ry = sin(rad)*((x - w/2.)/s*aspect + dx/s*aspect) + cos(rad)*((y - h/2.)/s + dy/s) + cy;
+                float val = bilinear_interpolate(im, rx, ry, c);
+                set_pixel(rot, x, y, c, val);
+            }
+        }
+    }
+    return rot;
+}
+
+image rotate_image(image im, float rad)
+{
+    int x, y, c;
+    float cx = im.w/2.;
+    float cy = im.h/2.;
+    image rot = make_image(im.w, im.h, im.c);
+    for(c = 0; c < im.c; ++c){
+        for(y = 0; y < im.h; ++y){
+            for(x = 0; x < im.w; ++x){
+                float rx = cos(rad)*(x-cx) - sin(rad)*(y-cy) + cx;
+                float ry = sin(rad)*(x-cx) + cos(rad)*(y-cy) + cy;
+                float val = bilinear_interpolate(im, rx, ry, c);
+                set_pixel(rot, x, y, c, val);
+            }
+        }
+    }
+    return rot;
+}
+
+void fill_image(image m, float s)
+{
+    int i;
+    for(i = 0; i < m.h*m.w*m.c; ++i) m.data[i] = s;
+}
+
+void translate_image(image m, float s)
+{
+    int i;
+    for(i = 0; i < m.h*m.w*m.c; ++i) m.data[i] += s;
+}
+
+void scale_image(image m, float s)
+{
+    int i;
+    for(i = 0; i < m.h*m.w*m.c; ++i) m.data[i] *= s;
+}
+
+image crop_image(image im, int dx, int dy, int w, int h)
+{
+    image cropped = make_image(w, h, im.c);
+    int i, j, k;
+    for(k = 0; k < im.c; ++k){
+        for(j = 0; j < h; ++j){
+            for(i = 0; i < w; ++i){
+                int r = j + dy;
+                int c = i + dx;
+                float val = 0;
+                r = constrain_int(r, 0, im.h-1);
+                c = constrain_int(c, 0, im.w-1);
+                val = get_pixel(im, c, r, k);
+                set_pixel(cropped, i, j, k, val);
+            }
+        }
+    }
+    return cropped;
+}
+
+int best_3d_shift_r(image a, image b, int min, int max)
+{
+    if(min == max) return min;
+    int mid = floor((min + max) / 2.);
+    image c1 = crop_image(b, 0, mid, b.w, b.h);
+    image c2 = crop_image(b, 0, mid+1, b.w, b.h);
+    float d1 = dist_array(c1.data, a.data, a.w*a.h*a.c, 10);
+    float d2 = dist_array(c2.data, a.data, a.w*a.h*a.c, 10);
+    free_image(c1);
+    free_image(c2);
+    if(d1 < d2) return best_3d_shift_r(a, b, min, mid);
+    else return best_3d_shift_r(a, b, mid+1, max);
+}
+
+int best_3d_shift(image a, image b, int min, int max)
+{
+    int i;
+    int best = 0;
+    float best_distance = FLT_MAX;
+    for(i = min; i <= max; i += 2){
+        image c = crop_image(b, 0, i, b.w, b.h);
+        float d = dist_array(c.data, a.data, a.w*a.h*a.c, 100);
+        if(d < best_distance){
+            best_distance = d;
+            best = i;
+        }
+        printf("%d %f\n", i, d);
+        free_image(c);
+    }
+    return best;
+}
+
+void composite_3d(char *f1, char *f2, char *out, int delta)
+{
+    if(!out) out = "out";
+    image a = load_image(f1, 0,0,0);
+    image b = load_image(f2, 0,0,0);
+    int shift = best_3d_shift_r(a, b, -a.h/100, a.h/100);
+
+    image c1 = crop_image(b, 10, shift, b.w, b.h);
+    float d1 = dist_array(c1.data, a.data, a.w*a.h*a.c, 100);
+    image c2 = crop_image(b, -10, shift, b.w, b.h);
+    float d2 = dist_array(c2.data, a.data, a.w*a.h*a.c, 100);
+
+    if(d2 < d1 && 0){
+        image swap = a;
+        a = b;
+        b = swap;
+        shift = -shift;
+        printf("swapped, %d\n", shift);
+    }
+    else{
+        printf("%d\n", shift);
+    }
+
+    image c = crop_image(b, delta, shift, a.w, a.h);
+    int i;
+    for(i = 0; i < c.w*c.h; ++i){
+        c.data[i] = a.data[i];
+    }
+    save_image(c, out);
+}
+
+void letterbox_image_into(image im, int w, int h, image boxed)
+{
+    int new_w = im.w;
+    int new_h = im.h;
+    if (((float)w/im.w) < ((float)h/im.h)) {
+        new_w = w;
+        new_h = (im.h * w)/im.w;
+    } else {
+        new_h = h;
+        new_w = (im.w * h)/im.h;
+    }
+    image resized = resize_image(im, new_w, new_h);
+    embed_image(resized, boxed, (w-new_w)/2, (h-new_h)/2); 
+    free_image(resized);
+}
+
+image letterbox_image(image im, int w, int h)
+{
+    int new_w = im.w;
+    int new_h = im.h;
+    if (((float)w/im.w) < ((float)h/im.h)) {
+        new_w = w;
+        new_h = (im.h * w)/im.w;
+    } else {
+        new_h = h;
+        new_w = (im.w * h)/im.h;
+    }
+    image resized = resize_image(im, new_w, new_h);
+    image boxed = make_image(w, h, im.c);
+    fill_image(boxed, .5);
+    //int i;
+    //for(i = 0; i < boxed.w*boxed.h*boxed.c; ++i) boxed.data[i] = 0;
+    embed_image(resized, boxed, (w-new_w)/2, (h-new_h)/2); 
+    free_image(resized);
+    return boxed;
+}
+
+image resize_max(image im, int max)
+{
+    int w = im.w;
+    int h = im.h;
+    if(w > h){
+        h = (h * max) / w;
+        w = max;
+    } else {
+        w = (w * max) / h;
+        h = max;
+    }
+    if(w == im.w && h == im.h) return im;
+    image resized = resize_image(im, w, h);
+    return resized;
+}
+
+image resize_min(image im, int min)
+{
+    int w = im.w;
+    int h = im.h;
+    if(w < h){
+        h = (h * min) / w;
+        w = min;
+    } else {
+        w = (w * min) / h;
+        h = min;
+    }
+    if(w == im.w && h == im.h) return im;
+    image resized = resize_image(im, w, h);
+    return resized;
+}
+
+image random_crop_image(image im, int w, int h)
+{
+    int dx = rand_int(0, im.w - w);
+    int dy = rand_int(0, im.h - h);
+    image crop = crop_image(im, dx, dy, w, h);
+    return crop;
+}
+
+augment_args random_augment_args(image im, float angle, float aspect, int low, int high, int w, int h)
+{
+    augment_args a = {0};
+    aspect = rand_scale(aspect);
+    int r = rand_int(low, high);
+    int min = (im.h < im.w*aspect) ? im.h : im.w*aspect;
+    float scale = (float)r / min;
+
+    float rad = rand_uniform(-angle, angle) * TWO_PI / 360.;
+
+    float dx = (im.w*scale/aspect - w) / 2.;
+    float dy = (im.h*scale - w) / 2.;
+    //if(dx < 0) dx = 0;
+    //if(dy < 0) dy = 0;
+    dx = rand_uniform(-dx, dx);
+    dy = rand_uniform(-dy, dy);
+
+    a.rad = rad;
+    a.scale = scale;
+    a.w = w;
+    a.h = h;
+    a.dx = dx;
+    a.dy = dy;
+    a.aspect = aspect;
+    return a;
+}
+
+image random_augment_image(image im, float angle, float aspect, int low, int high, int w, int h)
+{
+    augment_args a = random_augment_args(im, angle, aspect, low, high, w, h);
+    image crop = rotate_crop_image(im, a.rad, a.scale, a.w, a.h, a.dx, a.dy, a.aspect);
+    return crop;
+}
+
+float three_way_max(float a, float b, float c)
+{
+    return (a > b) ? ( (a > c) ? a : c) : ( (b > c) ? b : c) ;
+}
+
+float three_way_min(float a, float b, float c)
+{
+    return (a < b) ? ( (a < c) ? a : c) : ( (b < c) ? b : c) ;
+}
+
+void yuv_to_rgb(image im)
+{
+    assert(im.c == 3);
+    int i, j;
+    float r, g, b;
+    float y, u, v;
+    for(j = 0; j < im.h; ++j){
+        for(i = 0; i < im.w; ++i){
+            y = get_pixel(im, i , j, 0);
+            u = get_pixel(im, i , j, 1);
+            v = get_pixel(im, i , j, 2);
+
+            r = y + 1.13983*v;
+            g = y + -.39465*u + -.58060*v;
+            b = y + 2.03211*u;
+
+            set_pixel(im, i, j, 0, r);
+            set_pixel(im, i, j, 1, g);
+            set_pixel(im, i, j, 2, b);
+        }
+    }
+}
+
+void rgb_to_yuv(image im)
+{
+    assert(im.c == 3);
+    int i, j;
+    float r, g, b;
+    float y, u, v;
+    for(j = 0; j < im.h; ++j){
+        for(i = 0; i < im.w; ++i){
+            r = get_pixel(im, i , j, 0);
+            g = get_pixel(im, i , j, 1);
+            b = get_pixel(im, i , j, 2);
+
+            y = .299*r + .587*g + .114*b;
+            u = -.14713*r + -.28886*g + .436*b;
+            v = .615*r + -.51499*g + -.10001*b;
+
+            set_pixel(im, i, j, 0, y);
+            set_pixel(im, i, j, 1, u);
+            set_pixel(im, i, j, 2, v);
+        }
+    }
+}
+
+// http://www.cs.rit.edu/~ncs/color/t_convert.html
+void rgb_to_hsv(image im)
+{
+    assert(im.c == 3);
+    int i, j;
+    float r, g, b;
+    float h, s, v;
+    for(j = 0; j < im.h; ++j){
+        for(i = 0; i < im.w; ++i){
+            r = get_pixel(im, i , j, 0);
+            g = get_pixel(im, i , j, 1);
+            b = get_pixel(im, i , j, 2);
+            float max = three_way_max(r,g,b);
+            float min = three_way_min(r,g,b);
+            float delta = max - min;
+            v = max;
+            if(max == 0){
+                s = 0;
+                h = 0;
+            }else{
+                s = delta/max;
+                if(r == max){
+                    h = (g - b) / delta;
+                } else if (g == max) {
+                    h = 2 + (b - r) / delta;
+                } else {
+                    h = 4 + (r - g) / delta;
+                }
+                if (h < 0) h += 6;
+                h = h/6.;
+            }
+            set_pixel(im, i, j, 0, h);
+            set_pixel(im, i, j, 1, s);
+            set_pixel(im, i, j, 2, v);
+        }
+    }
+}
+
+void hsv_to_rgb(image im)
+{
+    assert(im.c == 3);
+    int i, j;
+    float r, g, b;
+    float h, s, v;
+    float f, p, q, t;
+    for(j = 0; j < im.h; ++j){
+        for(i = 0; i < im.w; ++i){
+            h = 6 * get_pixel(im, i , j, 0);
+            s = get_pixel(im, i , j, 1);
+            v = get_pixel(im, i , j, 2);
+            if (s == 0) {
+                r = g = b = v;
+            } else {
+                int index = floor(h);
+                f = h - index;
+                p = v*(1-s);
+                q = v*(1-s*f);
+                t = v*(1-s*(1-f));
+                if(index == 0){
+                    r = v; g = t; b = p;
+                } else if(index == 1){
+                    r = q; g = v; b = p;
+                } else if(index == 2){
+                    r = p; g = v; b = t;
+                } else if(index == 3){
+                    r = p; g = q; b = v;
+                } else if(index == 4){
+                    r = t; g = p; b = v;
+                } else {
+                    r = v; g = p; b = q;
+                }
+            }
+            set_pixel(im, i, j, 0, r);
+            set_pixel(im, i, j, 1, g);
+            set_pixel(im, i, j, 2, b);
+        }
+    }
+}
+
+void grayscale_image_3c(image im)
+{
+    assert(im.c == 3);
+    int i, j, k;
+    float scale[] = {0.299, 0.587, 0.114};
+    for(j = 0; j < im.h; ++j){
+        for(i = 0; i < im.w; ++i){
+            float val = 0;
+            for(k = 0; k < 3; ++k){
+                val += scale[k]*get_pixel(im, i, j, k);
+            }
+            im.data[0*im.h*im.w + im.w*j + i] = val;
+            im.data[1*im.h*im.w + im.w*j + i] = val;
+            im.data[2*im.h*im.w + im.w*j + i] = val;
+        }
+    }
+}
+
+image grayscale_image(image im)
+{
+    assert(im.c == 3);
+    int i, j, k;
+    image gray = make_image(im.w, im.h, 1);
+    float scale[] = {0.299, 0.587, 0.114};
+    for(k = 0; k < im.c; ++k){
+        for(j = 0; j < im.h; ++j){
+            for(i = 0; i < im.w; ++i){
+                gray.data[i+im.w*j] += scale[k]*get_pixel(im, i, j, k);
+            }
+        }
+    }
+    return gray;
+}
+
+image threshold_image(image im, float thresh)
+{
+    int i;
+    image t = make_image(im.w, im.h, im.c);
+    for(i = 0; i < im.w*im.h*im.c; ++i){
+        t.data[i] = im.data[i]>thresh ? 1 : 0;
+    }
+    return t;
+}
+
+image blend_image(image fore, image back, float alpha)
+{
+    assert(fore.w == back.w && fore.h == back.h && fore.c == back.c);
+    image blend = make_image(fore.w, fore.h, fore.c);
+    int i, j, k;
+    for(k = 0; k < fore.c; ++k){
+        for(j = 0; j < fore.h; ++j){
+            for(i = 0; i < fore.w; ++i){
+                float val = alpha * get_pixel(fore, i, j, k) + 
+                    (1 - alpha)* get_pixel(back, i, j, k);
+                set_pixel(blend, i, j, k, val);
+            }
+        }
+    }
+    return blend;
+}
+
+void scale_image_channel(image im, int c, float v)
+{
+    int i, j;
+    for(j = 0; j < im.h; ++j){
+        for(i = 0; i < im.w; ++i){
+            float pix = get_pixel(im, i, j, c);
+            pix = pix*v;
+            set_pixel(im, i, j, c, pix);
+        }
+    }
+}
+
+void translate_image_channel(image im, int c, float v)
+{
+    int i, j;
+    for(j = 0; j < im.h; ++j){
+        for(i = 0; i < im.w; ++i){
+            float pix = get_pixel(im, i, j, c);
+            pix = pix+v;
+            set_pixel(im, i, j, c, pix);
+        }
+    }
+}
+
+image binarize_image(image im)
+{
+    image c = copy_image(im);
+    int i;
+    for(i = 0; i < im.w * im.h * im.c; ++i){
+        if(c.data[i] > .5) c.data[i] = 1;
+        else c.data[i] = 0;
+    }
+    return c;
+}
+
+void saturate_image(image im, float sat)
+{
+    rgb_to_hsv(im);
+    scale_image_channel(im, 1, sat);
+    hsv_to_rgb(im);
+    constrain_image(im);
+}
+
+void hue_image(image im, float hue)
+{
+    rgb_to_hsv(im);
+    int i;
+    for(i = 0; i < im.w*im.h; ++i){
+        im.data[i] = im.data[i] + hue;
+        if (im.data[i] > 1) im.data[i] -= 1;
+        if (im.data[i] < 0) im.data[i] += 1;
+    }
+    hsv_to_rgb(im);
+    constrain_image(im);
+}
+
+void exposure_image(image im, float sat)
+{
+    rgb_to_hsv(im);
+    scale_image_channel(im, 2, sat);
+    hsv_to_rgb(im);
+    constrain_image(im);
+}
+
+void distort_image(image im, float hue, float sat, float val)
+{
+    rgb_to_hsv(im);
+    scale_image_channel(im, 1, sat);
+    scale_image_channel(im, 2, val);
+    int i;
+    for(i = 0; i < im.w*im.h; ++i){
+        im.data[i] = im.data[i] + hue;
+        if (im.data[i] > 1) im.data[i] -= 1;
+        if (im.data[i] < 0) im.data[i] += 1;
+    }
+    hsv_to_rgb(im);
+    constrain_image(im);
+}
+
+void random_distort_image(image im, float hue, float saturation, float exposure)
+{
+    float dhue = rand_uniform(-hue, hue);
+    float dsat = rand_scale(saturation);
+    float dexp = rand_scale(exposure);
+    distort_image(im, dhue, dsat, dexp);
+}
+
+void saturate_exposure_image(image im, float sat, float exposure)
+{
+    rgb_to_hsv(im);
+    scale_image_channel(im, 1, sat);
+    scale_image_channel(im, 2, exposure);
+    hsv_to_rgb(im);
+    constrain_image(im);
+}
+
+image resize_image(image im, int w, int h)
+{
+    image resized = make_image(w, h, im.c);   
+    image part = make_image(w, im.h, im.c);
+    int r, c, k;
+    float w_scale = (float)(im.w - 1) / (w - 1);
+    float h_scale = (float)(im.h - 1) / (h - 1);
+    for(k = 0; k < im.c; ++k){
+        for(r = 0; r < im.h; ++r){
+            for(c = 0; c < w; ++c){
+                float val = 0;
+                if(c == w-1 || im.w == 1){
+                    val = get_pixel(im, im.w-1, r, k);
+                } else {
+                    float sx = c*w_scale;
+                    int ix = (int) sx;
+                    float dx = sx - ix;
+                    val = (1 - dx) * get_pixel(im, ix, r, k) + dx * get_pixel(im, ix+1, r, k);
+                }
+                set_pixel(part, c, r, k, val);
+            }
+        }
+    }
+    for(k = 0; k < im.c; ++k){
+        for(r = 0; r < h; ++r){
+            float sy = r*h_scale;
+            int iy = (int) sy;
+            float dy = sy - iy;
+            for(c = 0; c < w; ++c){
+                float val = (1-dy) * get_pixel(part, c, iy, k);
+                set_pixel(resized, c, r, k, val);
+            }
+            if(r == h-1 || im.h == 1) continue;
+            for(c = 0; c < w; ++c){
+                float val = dy * get_pixel(part, c, iy+1, k);
+                add_pixel(resized, c, r, k, val);
+            }
+        }
+    }
+
+    free_image(part);
+    return resized;
+}
+
+
+void test_resize(char *filename)
+{
+    image im = load_image(filename, 0,0, 3);
+    float mag = mag_array(im.data, im.w*im.h*im.c);
+    printf("L2 Norm: %f\n", mag);
+    image gray = grayscale_image(im);
+
+    image c1 = copy_image(im);
+    image c2 = copy_image(im);
+    image c3 = copy_image(im);
+    image c4 = copy_image(im);
+    distort_image(c1, .1, 1.5, 1.5);
+    distort_image(c2, -.1, .66666, .66666);
+    distort_image(c3, .1, 1.5, .66666);
+    distort_image(c4, .1, .66666, 1.5);
+
+
+    show_image(im,   "Original", 1);
+    show_image(gray, "Gray", 1);
+    show_image(c1, "C1", 1);
+    show_image(c2, "C2", 1);
+    show_image(c3, "C3", 1);
+    show_image(c4, "C4", 1);
+#ifdef OPENCV
+    while(1){
+        image aug = random_augment_image(im, 0, .75, 320, 448, 320, 320);
+        show_image(aug, "aug", 1);
+        free_image(aug);
+
+
+        float exposure = 1.15;
+        float saturation = 1.15;
+        float hue = .05;
+
+        image c = copy_image(im);
+
+        float dexp = rand_scale(exposure);
+        float dsat = rand_scale(saturation);
+        float dhue = rand_uniform(-hue, hue);
+
+        distort_image(c, dhue, dsat, dexp);
+        show_image(c, "rand", 1);
+        printf("%f %f %f\n", dhue, dsat, dexp);
+        free_image(c);
+    }
+#endif
+}
+
+
+image load_image_stb(char *filename, int channels)
+{
+    int w, h, c;
+    unsigned char *data = stbi_load(filename, &w, &h, &c, channels);
+    if (!data) {
+        fprintf(stderr, "Cannot load image \"%s\"\nSTB Reason: %s\n", filename, stbi_failure_reason());
+        exit(0);
+    }
+    if(channels) c = channels;
+    int i,j,k;
+    image im = make_image(w, h, c);
+    for(k = 0; k < c; ++k){
+        for(j = 0; j < h; ++j){
+            for(i = 0; i < w; ++i){
+                int dst_index = i + w*j + w*h*k;
+                int src_index = k + c*i + c*w*j;
+                im.data[dst_index] = (float)data[src_index]/255.;
+            }
+        }
+    }
+    free(data);
+    return im;
+}
+
+image load_image(char *filename, int w, int h, int c)
+{
+#ifdef OPENCV
+    image out = load_image_cv(filename, c);
+#else
+    image out = load_image_stb(filename, c);
+#endif
+
+    if((h && w) && (h != out.h || w != out.w)){
+        image resized = resize_image(out, w, h);
+        free_image(out);
+        out = resized;
+    }
+    return out;
+}
+
+image load_image_color(char *filename, int w, int h)
+{
+    return load_image(filename, w, h, 3);
+}
+
+image get_image_layer(image m, int l)
+{
+    image out = make_image(m.w, m.h, 1);
+    int i;
+    for(i = 0; i < m.h*m.w; ++i){
+        out.data[i] = m.data[i+l*m.h*m.w];
+    }
+    return out;
+}
+void print_image(image m)
+{
+    int i, j, k;
+    for(i =0 ; i < m.c; ++i){
+        for(j =0 ; j < m.h; ++j){
+            for(k = 0; k < m.w; ++k){
+                printf("%.2lf, ", m.data[i*m.h*m.w + j*m.w + k]);
+                if(k > 30) break;
+            }
+            printf("\n");
+            if(j > 30) break;
+        }
+        printf("\n");
+    }
+    printf("\n");
+}
+
+image collapse_images_vert(image *ims, int n)
+{
+    int color = 1;
+    int border = 1;
+    int h,w,c;
+    w = ims[0].w;
+    h = (ims[0].h + border) * n - border;
+    c = ims[0].c;
+    if(c != 3 || !color){
+        w = (w+border)*c - border;
+        c = 1;
+    }
+
+    image filters = make_image(w, h, c);
+    int i,j;
+    for(i = 0; i < n; ++i){
+        int h_offset = i*(ims[0].h+border);
+        image copy = copy_image(ims[i]);
+        //normalize_image(copy);
+        if(c == 3 && color){
+            embed_image(copy, filters, 0, h_offset);
+        }
+        else{
+            for(j = 0; j < copy.c; ++j){
+                int w_offset = j*(ims[0].w+border);
+                image layer = get_image_layer(copy, j);
+                embed_image(layer, filters, w_offset, h_offset);
+                free_image(layer);
+            }
+        }
+        free_image(copy);
+    }
+    return filters;
+} 
+
+image collapse_images_horz(image *ims, int n)
+{
+    int color = 1;
+    int border = 1;
+    int h,w,c;
+    int size = ims[0].h;
+    h = size;
+    w = (ims[0].w + border) * n - border;
+    c = ims[0].c;
+    if(c != 3 || !color){
+        h = (h+border)*c - border;
+        c = 1;
+    }
+
+    image filters = make_image(w, h, c);
+    int i,j;
+    for(i = 0; i < n; ++i){
+        int w_offset = i*(size+border);
+        image copy = copy_image(ims[i]);
+        //normalize_image(copy);
+        if(c == 3 && color){
+            embed_image(copy, filters, w_offset, 0);
+        }
+        else{
+            for(j = 0; j < copy.c; ++j){
+                int h_offset = j*(size+border);
+                image layer = get_image_layer(copy, j);
+                embed_image(layer, filters, w_offset, h_offset);
+                free_image(layer);
+            }
+        }
+        free_image(copy);
+    }
+    return filters;
+} 
+
+void show_image_normalized(image im, const char *name)
+{
+    image c = copy_image(im);
+    normalize_image(c);
+    show_image(c, name, 1);
+    free_image(c);
+}
+
+void show_images(image *ims, int n, char *window)
+{
+    image m = collapse_images_vert(ims, n);
+    /*
+       int w = 448;
+       int h = ((float)m.h/m.w) * 448;
+       if(h > 896){
+       h = 896;
+       w = ((float)m.w/m.h) * 896;
+       }
+       image sized = resize_image(m, w, h);
+     */
+    normalize_image(m);
+    save_image(m, window);
+    show_image(m, window, 1);
+    free_image(m);
+}
+
+void free_image(image m)
+{
+    if(m.data){
+        free(m.data);
+    }
+}