rnn_vid.c 6.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208
  1. #include "darknet.h"
  2. #ifdef OPENCV
  3. image get_image_from_stream(CvCapture *cap);
  4. image ipl_to_image(IplImage* src);
  5. void reconstruct_picture(network net, float *features, image recon, image update, float rate, float momentum, float lambda, int smooth_size, int iters);
  6. typedef struct {
  7. float *x;
  8. float *y;
  9. } float_pair;
  10. float_pair get_rnn_vid_data(network net, char **files, int n, int batch, int steps)
  11. {
  12. int b;
  13. assert(net.batch == steps + 1);
  14. image out_im = get_network_image(net);
  15. int output_size = out_im.w*out_im.h*out_im.c;
  16. printf("%d %d %d\n", out_im.w, out_im.h, out_im.c);
  17. float *feats = calloc(net.batch*batch*output_size, sizeof(float));
  18. for(b = 0; b < batch; ++b){
  19. int input_size = net.w*net.h*net.c;
  20. float *input = calloc(input_size*net.batch, sizeof(float));
  21. char *filename = files[rand()%n];
  22. CvCapture *cap = cvCaptureFromFile(filename);
  23. int frames = cvGetCaptureProperty(cap, CV_CAP_PROP_FRAME_COUNT);
  24. int index = rand() % (frames - steps - 2);
  25. if (frames < (steps + 4)){
  26. --b;
  27. free(input);
  28. continue;
  29. }
  30. printf("frames: %d, index: %d\n", frames, index);
  31. cvSetCaptureProperty(cap, CV_CAP_PROP_POS_FRAMES, index);
  32. int i;
  33. for(i = 0; i < net.batch; ++i){
  34. IplImage* src = cvQueryFrame(cap);
  35. image im = ipl_to_image(src);
  36. rgbgr_image(im);
  37. image re = resize_image(im, net.w, net.h);
  38. //show_image(re, "loaded");
  39. //cvWaitKey(10);
  40. memcpy(input + i*input_size, re.data, input_size*sizeof(float));
  41. free_image(im);
  42. free_image(re);
  43. }
  44. float *output = network_predict(net, input);
  45. free(input);
  46. for(i = 0; i < net.batch; ++i){
  47. memcpy(feats + (b + i*batch)*output_size, output + i*output_size, output_size*sizeof(float));
  48. }
  49. cvReleaseCapture(&cap);
  50. }
  51. //printf("%d %d %d\n", out_im.w, out_im.h, out_im.c);
  52. float_pair p = {0};
  53. p.x = feats;
  54. p.y = feats + output_size*batch; //+ out_im.w*out_im.h*out_im.c;
  55. return p;
  56. }
  57. void train_vid_rnn(char *cfgfile, char *weightfile)
  58. {
  59. char *train_videos = "data/vid/train.txt";
  60. char *backup_directory = "/home/pjreddie/backup/";
  61. srand(time(0));
  62. char *base = basecfg(cfgfile);
  63. printf("%s\n", base);
  64. float avg_loss = -1;
  65. network net = parse_network_cfg(cfgfile);
  66. if(weightfile){
  67. load_weights(&net, weightfile);
  68. }
  69. printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
  70. int imgs = net.batch*net.subdivisions;
  71. int i = *net.seen/imgs;
  72. list *plist = get_paths(train_videos);
  73. int N = plist->size;
  74. char **paths = (char **)list_to_array(plist);
  75. clock_t time;
  76. int steps = net.time_steps;
  77. int batch = net.batch / net.time_steps;
  78. network extractor = parse_network_cfg("cfg/extractor.cfg");
  79. load_weights(&extractor, "/home/pjreddie/trained/yolo-coco.conv");
  80. while(get_current_batch(net) < net.max_batches){
  81. i += 1;
  82. time=clock();
  83. float_pair p = get_rnn_vid_data(extractor, paths, N, batch, steps);
  84. copy_cpu(net.inputs*net.batch, p.x, 1, net.input, 1);
  85. copy_cpu(net.truths*net.batch, p.y, 1, net.truth, 1);
  86. float loss = train_network_datum(net) / (net.batch);
  87. free(p.x);
  88. if (avg_loss < 0) avg_loss = loss;
  89. avg_loss = avg_loss*.9 + loss*.1;
  90. fprintf(stderr, "%d: %f, %f avg, %f rate, %lf seconds\n", i, loss, avg_loss, get_current_rate(net), sec(clock()-time));
  91. if(i%100==0){
  92. char buff[256];
  93. sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i);
  94. save_weights(net, buff);
  95. }
  96. if(i%10==0){
  97. char buff[256];
  98. sprintf(buff, "%s/%s.backup", backup_directory, base);
  99. save_weights(net, buff);
  100. }
  101. }
  102. char buff[256];
  103. sprintf(buff, "%s/%s_final.weights", backup_directory, base);
  104. save_weights(net, buff);
  105. }
  106. image save_reconstruction(network net, image *init, float *feat, char *name, int i)
  107. {
  108. image recon;
  109. if (init) {
  110. recon = copy_image(*init);
  111. } else {
  112. recon = make_random_image(net.w, net.h, 3);
  113. }
  114. image update = make_image(net.w, net.h, 3);
  115. reconstruct_picture(net, feat, recon, update, .01, .9, .1, 2, 50);
  116. char buff[256];
  117. sprintf(buff, "%s%d", name, i);
  118. save_image(recon, buff);
  119. free_image(update);
  120. return recon;
  121. }
  122. void generate_vid_rnn(char *cfgfile, char *weightfile)
  123. {
  124. network extractor = parse_network_cfg("cfg/extractor.recon.cfg");
  125. load_weights(&extractor, "/home/pjreddie/trained/yolo-coco.conv");
  126. network net = parse_network_cfg(cfgfile);
  127. if(weightfile){
  128. load_weights(&net, weightfile);
  129. }
  130. set_batch_network(&extractor, 1);
  131. set_batch_network(&net, 1);
  132. int i;
  133. CvCapture *cap = cvCaptureFromFile("/extra/vid/ILSVRC2015/Data/VID/snippets/val/ILSVRC2015_val_00007030.mp4");
  134. float *feat;
  135. float *next;
  136. image last;
  137. for(i = 0; i < 25; ++i){
  138. image im = get_image_from_stream(cap);
  139. image re = resize_image(im, extractor.w, extractor.h);
  140. feat = network_predict(extractor, re.data);
  141. if(i > 0){
  142. printf("%f %f\n", mean_array(feat, 14*14*512), variance_array(feat, 14*14*512));
  143. printf("%f %f\n", mean_array(next, 14*14*512), variance_array(next, 14*14*512));
  144. printf("%f\n", mse_array(feat, 14*14*512));
  145. axpy_cpu(14*14*512, -1, feat, 1, next, 1);
  146. printf("%f\n", mse_array(next, 14*14*512));
  147. }
  148. next = network_predict(net, feat);
  149. free_image(im);
  150. free_image(save_reconstruction(extractor, 0, feat, "feat", i));
  151. free_image(save_reconstruction(extractor, 0, next, "next", i));
  152. if (i==24) last = copy_image(re);
  153. free_image(re);
  154. }
  155. for(i = 0; i < 30; ++i){
  156. next = network_predict(net, next);
  157. image new = save_reconstruction(extractor, &last, next, "new", i);
  158. free_image(last);
  159. last = new;
  160. }
  161. }
  162. void run_vid_rnn(int argc, char **argv)
  163. {
  164. if(argc < 4){
  165. fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]);
  166. return;
  167. }
  168. char *cfg = argv[3];
  169. char *weights = (argc > 4) ? argv[4] : 0;
  170. //char *filename = (argc > 5) ? argv[5]: 0;
  171. if(0==strcmp(argv[2], "train")) train_vid_rnn(cfg, weights);
  172. else if(0==strcmp(argv[2], "generate")) generate_vid_rnn(cfg, weights);
  173. }
  174. #else
  175. void run_vid_rnn(int argc, char **argv){}
  176. #endif