yolo.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327
  1. #include "darknet.h"
  2. char *voc_names[] = {"aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"};
  3. void train_yolo(char *cfgfile, char *weightfile)
  4. {
  5. char *train_images = "/data/voc/train.txt";
  6. char *backup_directory = "/home/pjreddie/backup/";
  7. srand(time(0));
  8. char *base = basecfg(cfgfile);
  9. printf("%s\n", base);
  10. float avg_loss = -1;
  11. network *net = load_network(cfgfile, weightfile, 0);
  12. printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay);
  13. int imgs = net->batch*net->subdivisions;
  14. int i = *net->seen/imgs;
  15. data train, buffer;
  16. layer l = net->layers[net->n - 1];
  17. int side = l.side;
  18. int classes = l.classes;
  19. float jitter = l.jitter;
  20. list *plist = get_paths(train_images);
  21. //int N = plist->size;
  22. char **paths = (char **)list_to_array(plist);
  23. load_args args = {0};
  24. args.w = net->w;
  25. args.h = net->h;
  26. args.paths = paths;
  27. args.n = imgs;
  28. args.m = plist->size;
  29. args.classes = classes;
  30. args.jitter = jitter;
  31. args.num_boxes = side;
  32. args.d = &buffer;
  33. args.type = REGION_DATA;
  34. args.angle = net->angle;
  35. args.exposure = net->exposure;
  36. args.saturation = net->saturation;
  37. args.hue = net->hue;
  38. pthread_t load_thread = load_data_in_thread(args);
  39. clock_t time;
  40. //while(i*imgs < N*120){
  41. while(get_current_batch(net) < net->max_batches){
  42. i += 1;
  43. time=clock();
  44. pthread_join(load_thread, 0);
  45. train = buffer;
  46. load_thread = load_data_in_thread(args);
  47. printf("Loaded: %lf seconds\n", sec(clock()-time));
  48. time=clock();
  49. float loss = train_network(net, train);
  50. if (avg_loss < 0) avg_loss = loss;
  51. avg_loss = avg_loss*.9 + loss*.1;
  52. printf("%d: %f, %f avg, %f rate, %lf seconds, %d images\n", i, loss, avg_loss, get_current_rate(net), sec(clock()-time), i*imgs);
  53. if(i%1000==0 || (i < 1000 && i%100 == 0)){
  54. char buff[256];
  55. sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i);
  56. save_weights(net, buff);
  57. }
  58. free_data(train);
  59. }
  60. char buff[256];
  61. sprintf(buff, "%s/%s_final.weights", backup_directory, base);
  62. save_weights(net, buff);
  63. }
  64. void print_yolo_detections(FILE **fps, char *id, int total, int classes, int w, int h, detection *dets)
  65. {
  66. int i, j;
  67. for(i = 0; i < total; ++i){
  68. float xmin = dets[i].bbox.x - dets[i].bbox.w/2.;
  69. float xmax = dets[i].bbox.x + dets[i].bbox.w/2.;
  70. float ymin = dets[i].bbox.y - dets[i].bbox.h/2.;
  71. float ymax = dets[i].bbox.y + dets[i].bbox.h/2.;
  72. if (xmin < 0) xmin = 0;
  73. if (ymin < 0) ymin = 0;
  74. if (xmax > w) xmax = w;
  75. if (ymax > h) ymax = h;
  76. for(j = 0; j < classes; ++j){
  77. if (dets[i].prob[j]) fprintf(fps[j], "%s %f %f %f %f %f\n", id, dets[i].prob[j],
  78. xmin, ymin, xmax, ymax);
  79. }
  80. }
  81. }
  82. void validate_yolo(char *cfg, char *weights)
  83. {
  84. network *net = load_network(cfg, weights, 0);
  85. set_batch_network(net, 1);
  86. fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay);
  87. srand(time(0));
  88. char *base = "results/comp4_det_test_";
  89. //list *plist = get_paths("data/voc.2007.test");
  90. list *plist = get_paths("/home/pjreddie/data/voc/2007_test.txt");
  91. //list *plist = get_paths("data/voc.2012.test");
  92. char **paths = (char **)list_to_array(plist);
  93. layer l = net->layers[net->n-1];
  94. int classes = l.classes;
  95. int j;
  96. FILE **fps = calloc(classes, sizeof(FILE *));
  97. for(j = 0; j < classes; ++j){
  98. char buff[1024];
  99. snprintf(buff, 1024, "%s%s.txt", base, voc_names[j]);
  100. fps[j] = fopen(buff, "w");
  101. }
  102. int m = plist->size;
  103. int i=0;
  104. int t;
  105. float thresh = .001;
  106. int nms = 1;
  107. float iou_thresh = .5;
  108. int nthreads = 8;
  109. image *val = calloc(nthreads, sizeof(image));
  110. image *val_resized = calloc(nthreads, sizeof(image));
  111. image *buf = calloc(nthreads, sizeof(image));
  112. image *buf_resized = calloc(nthreads, sizeof(image));
  113. pthread_t *thr = calloc(nthreads, sizeof(pthread_t));
  114. load_args args = {0};
  115. args.w = net->w;
  116. args.h = net->h;
  117. args.type = IMAGE_DATA;
  118. for(t = 0; t < nthreads; ++t){
  119. args.path = paths[i+t];
  120. args.im = &buf[t];
  121. args.resized = &buf_resized[t];
  122. thr[t] = load_data_in_thread(args);
  123. }
  124. time_t start = time(0);
  125. for(i = nthreads; i < m+nthreads; i += nthreads){
  126. fprintf(stderr, "%d\n", i);
  127. for(t = 0; t < nthreads && i+t-nthreads < m; ++t){
  128. pthread_join(thr[t], 0);
  129. val[t] = buf[t];
  130. val_resized[t] = buf_resized[t];
  131. }
  132. for(t = 0; t < nthreads && i+t < m; ++t){
  133. args.path = paths[i+t];
  134. args.im = &buf[t];
  135. args.resized = &buf_resized[t];
  136. thr[t] = load_data_in_thread(args);
  137. }
  138. for(t = 0; t < nthreads && i+t-nthreads < m; ++t){
  139. char *path = paths[i+t-nthreads];
  140. char *id = basecfg(path);
  141. float *X = val_resized[t].data;
  142. network_predict(net, X);
  143. int w = val[t].w;
  144. int h = val[t].h;
  145. int nboxes = 0;
  146. detection *dets = get_network_boxes(net, w, h, thresh, 0, 0, 0, &nboxes);
  147. if (nms) do_nms_sort(dets, l.side*l.side*l.n, classes, iou_thresh);
  148. print_yolo_detections(fps, id, l.side*l.side*l.n, classes, w, h, dets);
  149. free_detections(dets, nboxes);
  150. free(id);
  151. free_image(val[t]);
  152. free_image(val_resized[t]);
  153. }
  154. }
  155. fprintf(stderr, "Total Detection Time: %f Seconds\n", (double)(time(0) - start));
  156. }
  157. void validate_yolo_recall(char *cfg, char *weights)
  158. {
  159. network *net = load_network(cfg, weights, 0);
  160. set_batch_network(net, 1);
  161. fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay);
  162. srand(time(0));
  163. char *base = "results/comp4_det_test_";
  164. list *plist = get_paths("data/voc.2007.test");
  165. char **paths = (char **)list_to_array(plist);
  166. layer l = net->layers[net->n-1];
  167. int classes = l.classes;
  168. int side = l.side;
  169. int j, k;
  170. FILE **fps = calloc(classes, sizeof(FILE *));
  171. for(j = 0; j < classes; ++j){
  172. char buff[1024];
  173. snprintf(buff, 1024, "%s%s.txt", base, voc_names[j]);
  174. fps[j] = fopen(buff, "w");
  175. }
  176. int m = plist->size;
  177. int i=0;
  178. float thresh = .001;
  179. float iou_thresh = .5;
  180. float nms = 0;
  181. int total = 0;
  182. int correct = 0;
  183. int proposals = 0;
  184. float avg_iou = 0;
  185. for(i = 0; i < m; ++i){
  186. char *path = paths[i];
  187. image orig = load_image_color(path, 0, 0);
  188. image sized = resize_image(orig, net->w, net->h);
  189. char *id = basecfg(path);
  190. network_predict(net, sized.data);
  191. int nboxes = 0;
  192. detection *dets = get_network_boxes(net, orig.w, orig.h, thresh, 0, 0, 1, &nboxes);
  193. if (nms) do_nms_obj(dets, side*side*l.n, 1, nms);
  194. char labelpath[4096];
  195. find_replace(path, "images", "labels", labelpath);
  196. find_replace(labelpath, "JPEGImages", "labels", labelpath);
  197. find_replace(labelpath, ".jpg", ".txt", labelpath);
  198. find_replace(labelpath, ".JPEG", ".txt", labelpath);
  199. int num_labels = 0;
  200. box_label *truth = read_boxes(labelpath, &num_labels);
  201. for(k = 0; k < side*side*l.n; ++k){
  202. if(dets[k].objectness > thresh){
  203. ++proposals;
  204. }
  205. }
  206. for (j = 0; j < num_labels; ++j) {
  207. ++total;
  208. box t = {truth[j].x, truth[j].y, truth[j].w, truth[j].h};
  209. float best_iou = 0;
  210. for(k = 0; k < side*side*l.n; ++k){
  211. float iou = box_iou(dets[k].bbox, t);
  212. if(dets[k].objectness > thresh && iou > best_iou){
  213. best_iou = iou;
  214. }
  215. }
  216. avg_iou += best_iou;
  217. if(best_iou > iou_thresh){
  218. ++correct;
  219. }
  220. }
  221. fprintf(stderr, "%5d %5d %5d\tRPs/Img: %.2f\tIOU: %.2f%%\tRecall:%.2f%%\n", i, correct, total, (float)proposals/(i+1), avg_iou*100/total, 100.*correct/total);
  222. free_detections(dets, nboxes);
  223. free(id);
  224. free_image(orig);
  225. free_image(sized);
  226. }
  227. }
  228. void test_yolo(char *cfgfile, char *weightfile, char *filename, float thresh)
  229. {
  230. image **alphabet = load_alphabet();
  231. network *net = load_network(cfgfile, weightfile, 0);
  232. layer l = net->layers[net->n-1];
  233. set_batch_network(net, 1);
  234. srand(2222222);
  235. clock_t time;
  236. char buff[256];
  237. char *input = buff;
  238. float nms=.4;
  239. while(1){
  240. if(filename){
  241. strncpy(input, filename, 256);
  242. } else {
  243. printf("Enter Image Path: ");
  244. fflush(stdout);
  245. input = fgets(input, 256, stdin);
  246. if(!input) return;
  247. strtok(input, "\n");
  248. }
  249. image im = load_image_color(input,0,0);
  250. image sized = resize_image(im, net->w, net->h);
  251. float *X = sized.data;
  252. time=clock();
  253. network_predict(net, X);
  254. printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time));
  255. int nboxes = 0;
  256. detection *dets = get_network_boxes(net, 1, 1, thresh, 0, 0, 0, &nboxes);
  257. if (nms) do_nms_sort(dets, l.side*l.side*l.n, l.classes, nms);
  258. draw_detections(im, dets, l.side*l.side*l.n, thresh, voc_names, alphabet, 20);
  259. save_image(im, "predictions");
  260. show_image(im, "predictions", 0);
  261. free_detections(dets, nboxes);
  262. free_image(im);
  263. free_image(sized);
  264. if (filename) break;
  265. }
  266. }
  267. void run_yolo(int argc, char **argv)
  268. {
  269. char *prefix = find_char_arg(argc, argv, "-prefix", 0);
  270. float thresh = find_float_arg(argc, argv, "-thresh", .2);
  271. int cam_index = find_int_arg(argc, argv, "-c", 0);
  272. int frame_skip = find_int_arg(argc, argv, "-s", 0);
  273. if(argc < 4){
  274. fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]);
  275. return;
  276. }
  277. int avg = find_int_arg(argc, argv, "-avg", 1);
  278. char *cfg = argv[3];
  279. char *weights = (argc > 4) ? argv[4] : 0;
  280. char *filename = (argc > 5) ? argv[5]: 0;
  281. if(0==strcmp(argv[2], "test")) test_yolo(cfg, weights, filename, thresh);
  282. else if(0==strcmp(argv[2], "train")) train_yolo(cfg, weights);
  283. else if(0==strcmp(argv[2], "valid")) validate_yolo(cfg, weights);
  284. else if(0==strcmp(argv[2], "recall")) validate_yolo_recall(cfg, weights);
  285. else if(0==strcmp(argv[2], "demo")) demo(cfg, weights, thresh, cam_index, filename, voc_names, 20, frame_skip, prefix, avg, .5, 0,0,0,0);
  286. }