attention.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459
  1. #include "darknet.h"
  2. #include <sys/time.h>
  3. #include <assert.h>
  4. void extend_data_truth(data *d, int n, float val)
  5. {
  6. int i, j;
  7. for(i = 0; i < d->y.rows; ++i){
  8. d->y.vals[i] = realloc(d->y.vals[i], (d->y.cols+n)*sizeof(float));
  9. for(j = 0; j < n; ++j){
  10. d->y.vals[i][d->y.cols + j] = val;
  11. }
  12. }
  13. d->y.cols += n;
  14. }
  15. matrix network_loss_data(network *net, data test)
  16. {
  17. int i,b;
  18. int k = 1;
  19. matrix pred = make_matrix(test.X.rows, k);
  20. float *X = calloc(net->batch*test.X.cols, sizeof(float));
  21. float *y = calloc(net->batch*test.y.cols, sizeof(float));
  22. for(i = 0; i < test.X.rows; i += net->batch){
  23. for(b = 0; b < net->batch; ++b){
  24. if(i+b == test.X.rows) break;
  25. memcpy(X+b*test.X.cols, test.X.vals[i+b], test.X.cols*sizeof(float));
  26. memcpy(y+b*test.y.cols, test.y.vals[i+b], test.y.cols*sizeof(float));
  27. }
  28. network orig = *net;
  29. net->input = X;
  30. net->truth = y;
  31. net->train = 0;
  32. net->delta = 0;
  33. forward_network(net);
  34. *net = orig;
  35. float *delta = net->layers[net->n-1].output;
  36. for(b = 0; b < net->batch; ++b){
  37. if(i+b == test.X.rows) break;
  38. int t = max_index(y + b*test.y.cols, 1000);
  39. float err = sum_array(delta + b*net->outputs, net->outputs);
  40. pred.vals[i+b][0] = -err;
  41. //pred.vals[i+b][0] = 1-delta[b*net->outputs + t];
  42. }
  43. }
  44. free(X);
  45. free(y);
  46. return pred;
  47. }
  48. void train_attention(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear)
  49. {
  50. int i, j;
  51. float avg_cls_loss = -1;
  52. float avg_att_loss = -1;
  53. char *base = basecfg(cfgfile);
  54. printf("%s\n", base);
  55. printf("%d\n", ngpus);
  56. network **nets = calloc(ngpus, sizeof(network*));
  57. srand(time(0));
  58. int seed = rand();
  59. for(i = 0; i < ngpus; ++i){
  60. srand(seed);
  61. #ifdef GPU
  62. cuda_set_device(gpus[i]);
  63. #endif
  64. nets[i] = load_network(cfgfile, weightfile, clear);
  65. nets[i]->learning_rate *= ngpus;
  66. }
  67. srand(time(0));
  68. network *net = nets[0];
  69. int imgs = net->batch * net->subdivisions * ngpus;
  70. printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay);
  71. list *options = read_data_cfg(datacfg);
  72. char *backup_directory = option_find_str(options, "backup", "/backup/");
  73. char *label_list = option_find_str(options, "labels", "data/labels.list");
  74. char *train_list = option_find_str(options, "train", "data/train.list");
  75. int classes = option_find_int(options, "classes", 2);
  76. char **labels = get_labels(label_list);
  77. list *plist = get_paths(train_list);
  78. char **paths = (char **)list_to_array(plist);
  79. printf("%d\n", plist->size);
  80. int N = plist->size;
  81. double time;
  82. int divs=3;
  83. int size=2;
  84. load_args args = {0};
  85. args.w = divs*net->w/size;
  86. args.h = divs*net->h/size;
  87. args.size = divs*net->w/size;
  88. args.threads = 32;
  89. args.hierarchy = net->hierarchy;
  90. args.min = net->min_ratio*args.w;
  91. args.max = net->max_ratio*args.w;
  92. args.angle = net->angle;
  93. args.aspect = net->aspect;
  94. args.exposure = net->exposure;
  95. args.saturation = net->saturation;
  96. args.hue = net->hue;
  97. args.paths = paths;
  98. args.classes = classes;
  99. args.n = imgs;
  100. args.m = N;
  101. args.labels = labels;
  102. args.type = CLASSIFICATION_DATA;
  103. data train;
  104. data buffer;
  105. pthread_t load_thread;
  106. args.d = &buffer;
  107. load_thread = load_data(args);
  108. int epoch = (*net->seen)/N;
  109. while(get_current_batch(net) < net->max_batches || net->max_batches == 0){
  110. time = what_time_is_it_now();
  111. pthread_join(load_thread, 0);
  112. train = buffer;
  113. load_thread = load_data(args);
  114. data resized = resize_data(train, net->w, net->h);
  115. extend_data_truth(&resized, divs*divs, 0);
  116. data *tiles = tile_data(train, divs, size);
  117. printf("Loaded: %lf seconds\n", what_time_is_it_now()-time);
  118. time = what_time_is_it_now();
  119. float aloss = 0;
  120. float closs = 0;
  121. int z;
  122. for (i = 0; i < divs*divs/ngpus; ++i) {
  123. #pragma omp parallel for
  124. for(j = 0; j < ngpus; ++j){
  125. int index = i*ngpus + j;
  126. extend_data_truth(tiles+index, divs*divs, SECRET_NUM);
  127. matrix deltas = network_loss_data(nets[j], tiles[index]);
  128. for(z = 0; z < resized.y.rows; ++z){
  129. resized.y.vals[z][train.y.cols + index] = deltas.vals[z][0];
  130. }
  131. free_matrix(deltas);
  132. }
  133. }
  134. int *inds = calloc(resized.y.rows, sizeof(int));
  135. for(z = 0; z < resized.y.rows; ++z){
  136. int index = max_index(resized.y.vals[z] + train.y.cols, divs*divs);
  137. inds[z] = index;
  138. for(i = 0; i < divs*divs; ++i){
  139. resized.y.vals[z][train.y.cols + i] = (i == index)? 1 : 0;
  140. }
  141. }
  142. data best = select_data(tiles, inds);
  143. free(inds);
  144. #ifdef GPU
  145. if (ngpus == 1) {
  146. closs = train_network(net, best);
  147. } else {
  148. closs = train_networks(nets, ngpus, best, 4);
  149. }
  150. #endif
  151. for (i = 0; i < divs*divs; ++i) {
  152. printf("%.2f ", resized.y.vals[0][train.y.cols + i]);
  153. if((i+1)%divs == 0) printf("\n");
  154. free_data(tiles[i]);
  155. }
  156. free_data(best);
  157. printf("\n");
  158. image im = float_to_image(64,64,3,resized.X.vals[0]);
  159. //show_image(im, "orig");
  160. //cvWaitKey(100);
  161. /*
  162. image im1 = float_to_image(64,64,3,tiles[i].X.vals[0]);
  163. image im2 = float_to_image(64,64,3,resized.X.vals[0]);
  164. show_image(im1, "tile");
  165. show_image(im2, "res");
  166. */
  167. #ifdef GPU
  168. if (ngpus == 1) {
  169. aloss = train_network(net, resized);
  170. } else {
  171. aloss = train_networks(nets, ngpus, resized, 4);
  172. }
  173. #endif
  174. for(i = 0; i < divs*divs; ++i){
  175. printf("%f ", nets[0]->output[1000 + i]);
  176. if ((i+1) % divs == 0) printf("\n");
  177. }
  178. printf("\n");
  179. free_data(resized);
  180. free_data(train);
  181. if(avg_cls_loss == -1) avg_cls_loss = closs;
  182. if(avg_att_loss == -1) avg_att_loss = aloss;
  183. avg_cls_loss = avg_cls_loss*.9 + closs*.1;
  184. avg_att_loss = avg_att_loss*.9 + aloss*.1;
  185. printf("%ld, %.3f: Att: %f, %f avg, Class: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net->seen)/N, aloss, avg_att_loss, closs, avg_cls_loss, get_current_rate(net), what_time_is_it_now()-time, *net->seen);
  186. if(*net->seen/N > epoch){
  187. epoch = *net->seen/N;
  188. char buff[256];
  189. sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch);
  190. save_weights(net, buff);
  191. }
  192. if(get_current_batch(net)%1000 == 0){
  193. char buff[256];
  194. sprintf(buff, "%s/%s.backup",backup_directory,base);
  195. save_weights(net, buff);
  196. }
  197. }
  198. char buff[256];
  199. sprintf(buff, "%s/%s.weights", backup_directory, base);
  200. save_weights(net, buff);
  201. pthread_join(load_thread, 0);
  202. free_network(net);
  203. free_ptrs((void**)labels, classes);
  204. free_ptrs((void**)paths, plist->size);
  205. free_list(plist);
  206. free(base);
  207. }
  208. void validate_attention_single(char *datacfg, char *filename, char *weightfile)
  209. {
  210. int i, j;
  211. network *net = load_network(filename, weightfile, 0);
  212. set_batch_network(net, 1);
  213. srand(time(0));
  214. list *options = read_data_cfg(datacfg);
  215. char *label_list = option_find_str(options, "labels", "data/labels.list");
  216. char *leaf_list = option_find_str(options, "leaves", 0);
  217. if(leaf_list) change_leaves(net->hierarchy, leaf_list);
  218. char *valid_list = option_find_str(options, "valid", "data/train.list");
  219. int classes = option_find_int(options, "classes", 2);
  220. int topk = option_find_int(options, "top", 1);
  221. char **labels = get_labels(label_list);
  222. list *plist = get_paths(valid_list);
  223. char **paths = (char **)list_to_array(plist);
  224. int m = plist->size;
  225. free_list(plist);
  226. float avg_acc = 0;
  227. float avg_topk = 0;
  228. int *indexes = calloc(topk, sizeof(int));
  229. int divs = 4;
  230. int size = 2;
  231. int extra = 0;
  232. float *avgs = calloc(classes, sizeof(float));
  233. int *inds = calloc(divs*divs, sizeof(int));
  234. for(i = 0; i < m; ++i){
  235. int class = -1;
  236. char *path = paths[i];
  237. for(j = 0; j < classes; ++j){
  238. if(strstr(path, labels[j])){
  239. class = j;
  240. break;
  241. }
  242. }
  243. image im = load_image_color(paths[i], 0, 0);
  244. image resized = resize_min(im, net->w*divs/size);
  245. image crop = crop_image(resized, (resized.w - net->w*divs/size)/2, (resized.h - net->h*divs/size)/2, net->w*divs/size, net->h*divs/size);
  246. image rcrop = resize_image(crop, net->w, net->h);
  247. //show_image(im, "orig");
  248. //show_image(crop, "cropped");
  249. //cvWaitKey(0);
  250. float *pred = network_predict(net, rcrop.data);
  251. //pred[classes + 56] = 0;
  252. for(j = 0; j < divs*divs; ++j){
  253. printf("%.2f ", pred[classes + j]);
  254. if((j+1)%divs == 0) printf("\n");
  255. }
  256. printf("\n");
  257. copy_cpu(classes, pred, 1, avgs, 1);
  258. top_k(pred + classes, divs*divs, divs*divs, inds);
  259. show_image(crop, "crop");
  260. for(j = 0; j < extra; ++j){
  261. int index = inds[j];
  262. int row = index / divs;
  263. int col = index % divs;
  264. int y = row * crop.h / divs - (net->h - crop.h/divs)/2;
  265. int x = col * crop.w / divs - (net->w - crop.w/divs)/2;
  266. printf("%d %d %d %d\n", row, col, y, x);
  267. image tile = crop_image(crop, x, y, net->w, net->h);
  268. float *pred = network_predict(net, tile.data);
  269. axpy_cpu(classes, 1., pred, 1, avgs, 1);
  270. show_image(tile, "tile");
  271. //cvWaitKey(10);
  272. }
  273. if(net->hierarchy) hierarchy_predictions(pred, net->outputs, net->hierarchy, 1, 1);
  274. if(rcrop.data != resized.data) free_image(rcrop);
  275. if(resized.data != im.data) free_image(resized);
  276. free_image(im);
  277. free_image(crop);
  278. top_k(pred, classes, topk, indexes);
  279. if(indexes[0] == class) avg_acc += 1;
  280. for(j = 0; j < topk; ++j){
  281. if(indexes[j] == class) avg_topk += 1;
  282. }
  283. printf("%d: top 1: %f, top %d: %f\n", i, avg_acc/(i+1), topk, avg_topk/(i+1));
  284. }
  285. }
  286. void validate_attention_multi(char *datacfg, char *filename, char *weightfile)
  287. {
  288. int i, j;
  289. network *net = load_network(filename, weightfile, 0);
  290. set_batch_network(net, 1);
  291. srand(time(0));
  292. list *options = read_data_cfg(datacfg);
  293. char *label_list = option_find_str(options, "labels", "data/labels.list");
  294. char *valid_list = option_find_str(options, "valid", "data/train.list");
  295. int classes = option_find_int(options, "classes", 2);
  296. int topk = option_find_int(options, "top", 1);
  297. char **labels = get_labels(label_list);
  298. list *plist = get_paths(valid_list);
  299. int scales[] = {224, 288, 320, 352, 384};
  300. int nscales = sizeof(scales)/sizeof(scales[0]);
  301. char **paths = (char **)list_to_array(plist);
  302. int m = plist->size;
  303. free_list(plist);
  304. float avg_acc = 0;
  305. float avg_topk = 0;
  306. int *indexes = calloc(topk, sizeof(int));
  307. for(i = 0; i < m; ++i){
  308. int class = -1;
  309. char *path = paths[i];
  310. for(j = 0; j < classes; ++j){
  311. if(strstr(path, labels[j])){
  312. class = j;
  313. break;
  314. }
  315. }
  316. float *pred = calloc(classes, sizeof(float));
  317. image im = load_image_color(paths[i], 0, 0);
  318. for(j = 0; j < nscales; ++j){
  319. image r = resize_min(im, scales[j]);
  320. resize_network(net, r.w, r.h);
  321. float *p = network_predict(net, r.data);
  322. if(net->hierarchy) hierarchy_predictions(p, net->outputs, net->hierarchy, 1 , 1);
  323. axpy_cpu(classes, 1, p, 1, pred, 1);
  324. flip_image(r);
  325. p = network_predict(net, r.data);
  326. axpy_cpu(classes, 1, p, 1, pred, 1);
  327. if(r.data != im.data) free_image(r);
  328. }
  329. free_image(im);
  330. top_k(pred, classes, topk, indexes);
  331. free(pred);
  332. if(indexes[0] == class) avg_acc += 1;
  333. for(j = 0; j < topk; ++j){
  334. if(indexes[j] == class) avg_topk += 1;
  335. }
  336. printf("%d: top 1: %f, top %d: %f\n", i, avg_acc/(i+1), topk, avg_topk/(i+1));
  337. }
  338. }
  339. void predict_attention(char *datacfg, char *cfgfile, char *weightfile, char *filename, int top)
  340. {
  341. network *net = load_network(cfgfile, weightfile, 0);
  342. set_batch_network(net, 1);
  343. srand(2222222);
  344. list *options = read_data_cfg(datacfg);
  345. char *name_list = option_find_str(options, "names", 0);
  346. if(!name_list) name_list = option_find_str(options, "labels", "data/labels.list");
  347. if(top == 0) top = option_find_int(options, "top", 1);
  348. int i = 0;
  349. char **names = get_labels(name_list);
  350. clock_t time;
  351. int *indexes = calloc(top, sizeof(int));
  352. char buff[256];
  353. char *input = buff;
  354. while(1){
  355. if(filename){
  356. strncpy(input, filename, 256);
  357. }else{
  358. printf("Enter Image Path: ");
  359. fflush(stdout);
  360. input = fgets(input, 256, stdin);
  361. if(!input) return;
  362. strtok(input, "\n");
  363. }
  364. image im = load_image_color(input, 0, 0);
  365. image r = letterbox_image(im, net->w, net->h);
  366. //resize_network(&net, r.w, r.h);
  367. //printf("%d %d\n", r.w, r.h);
  368. float *X = r.data;
  369. time=clock();
  370. float *predictions = network_predict(net, X);
  371. if(net->hierarchy) hierarchy_predictions(predictions, net->outputs, net->hierarchy, 1, 1);
  372. top_k(predictions, net->outputs, top, indexes);
  373. fprintf(stderr, "%s: Predicted in %f seconds.\n", input, sec(clock()-time));
  374. for(i = 0; i < top; ++i){
  375. int index = indexes[i];
  376. //if(net->hierarchy) printf("%d, %s: %f, parent: %s \n",index, names[index], predictions[index], (net->hierarchy->parent[index] >= 0) ? names[net->hierarchy->parent[index]] : "Root");
  377. //else printf("%s: %f\n",names[index], predictions[index]);
  378. printf("%5.2f%%: %s\n", predictions[index]*100, names[index]);
  379. }
  380. if(r.data != im.data) free_image(r);
  381. free_image(im);
  382. if (filename) break;
  383. }
  384. }
  385. void run_attention(int argc, char **argv)
  386. {
  387. if(argc < 4){
  388. fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]);
  389. return;
  390. }
  391. char *gpu_list = find_char_arg(argc, argv, "-gpus", 0);
  392. int ngpus;
  393. int *gpus = read_intlist(gpu_list, &ngpus, gpu_index);
  394. int top = find_int_arg(argc, argv, "-t", 0);
  395. int clear = find_arg(argc, argv, "-clear");
  396. char *data = argv[3];
  397. char *cfg = argv[4];
  398. char *weights = (argc > 5) ? argv[5] : 0;
  399. char *filename = (argc > 6) ? argv[6]: 0;
  400. char *layer_s = (argc > 7) ? argv[7]: 0;
  401. if(0==strcmp(argv[2], "predict")) predict_attention(data, cfg, weights, filename, top);
  402. else if(0==strcmp(argv[2], "train")) train_attention(data, cfg, weights, gpus, ngpus, clear);
  403. else if(0==strcmp(argv[2], "valid")) validate_attention_single(data, cfg, weights);
  404. else if(0==strcmp(argv[2], "validmulti")) validate_attention_multi(data, cfg, weights);
  405. }