diff options
author | Kevin Zhao | 2024-05-11 11:12:31 -0400 |
---|---|---|
committer | Kevin Zhao | 2024-05-11 11:12:31 -0400 |
commit | 0d8a8409f9aae14b9413c749146bc265bcd21106 (patch) | |
tree | 597c0abd1e930992dfea2ae981a0f88458d06e13 | |
parent | f2e199949f69d3a7c08939a63f70c7a2514fd29f (diff) |
More flexible grid sizes (non-square and not multiple of 80); calibrated vs gtruth diff plot in comments; 90kbps
-rw-r--r-- | decoder.py | 53 | ||||
-rw-r--r-- | decoding_utils.py | 131 | ||||
-rw-r--r-- | encoder.py | 12 |
3 files changed, 84 insertions, 112 deletions
@@ -7,7 +7,6 @@ import torch from creedsolo import RSCodec from raptorq import Decoder -from corner_training.models import QuantizedV2, QuantizedV5 from decoding_utils import localize_corners_wrapper parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) @@ -25,9 +24,7 @@ args = parser.parse_args() if args.version == 0: cheight = cwidth = max(args.height // 10, args.width // 10) elif args.version == 1: - assert args.height * 3 % 80 == args.width * 3 % 80 == 0 - cheight = int(args.height * 0.15) - cwidth = int(args.width * 0.15) + cheight = cwidth = int(max(args.height, args.width) * 0.16) else: raise NotImplementedError @@ -39,8 +36,6 @@ rs_bytes = frame_bytes - (frame_bytes + 254) // 255 * int(args.level * 255) - 4 rsc = RSCodec(int(args.level * 255)) decoder = Decoder.with_defaults(args.size, rs_bytes) -input_crop_size = 1024 - if args.version == 0: def find_corner(A, f): cx, cy = A.shape[:2] @@ -92,25 +87,20 @@ if args.version == 0: return frame, (wcol, rcol, gcol, bcol) elif args.version == 1: - localize_corners = localize_corners_wrapper(args, input_crop_size) + localize_corners = localize_corners_wrapper(args) # #### -# vid_frames = [] -# # cap = cv2.VideoCapture("/Users/kevinzhao/Downloads/IMG_0994.MOV") -# cap = cv2.VideoCapture("vid_tiny_v1.mkv") +# gtruth_frames = [] +# cap = cv2.VideoCapture("vid_mid_v1.mkv") # data = None -# start_time = time.time() # while data is None: # ret, raw_frame = cap.read() # if not ret: # print("End of stream") # break -# vid_frames.append(raw_frame) -# gtruth = cv2.cvtColor(vid_frames[0], cv2.COLOR_BGR2RGB) +# gtruth_frames.append(cv2.cvtColor(raw_frame, cv2.COLOR_BGR2RGB)) # #### - - if args.input.isdecimal(): args.input = int(args.input) cap = cv2.VideoCapture(args.input) @@ -119,7 +109,6 @@ start_time = time.time() while data is None: try: ret, raw_frame = cap.read() - # raw_frame = cv2.resize(raw_frame, (1024, 1024), interpolation=cv2.INTER_NEAREST) # TODO: remove if not ret: print("End of stream") break @@ -131,12 +120,13 @@ while data is None: X, Y = raw_frame.shape[:2] raw_frame = raw_frame[X // 4: 3 * X // 4, Y // 4: 3 * Y // 4] elif args.version == 1: - h, w, _ = raw_frame.shape - raw_frame = raw_frame[(h - input_crop_size) // 2:-(h - input_crop_size) // 2, # TODO: put back - (w - input_crop_size) // 2:-(w - input_crop_size) // 2] + pass + # h, w, _ = raw_frame.shape + # raw_frame = raw_frame[(h - input_crop_size) // 2:-(h - input_crop_size) // 2, + # (w - input_crop_size) // 2:-(w - input_crop_size) // 2] - cv2.imshow("", raw_frame) - cv2.waitKey(1) + # cv2.imshow("", raw_frame) + # cv2.waitKey(1) raw_frame = cv2.cvtColor(raw_frame, cv2.COLOR_BGR2RGB) frame, (wcol, rcol, gcol, bcol) = localize_corners(raw_frame) @@ -151,21 +141,30 @@ while data is None: # Convert to new color space # calibrated_frame = (np.squeeze(F @ (frame - origin)[..., np.newaxis]) >= 192).astype(np.uint8) - # calibrated_frame = (np.squeeze(F @ (frame - origin)[..., np.newaxis]) >= 192).astype(np.uint8) * 255 - calibrated_frame = (np.squeeze(F @ (frame - origin)[..., np.newaxis]) >= 150).astype(np.uint8) * 255 + calibrated_frame = (np.squeeze(F @ (frame - origin)[..., np.newaxis]) >= 128).astype(np.uint8) + # fig, axs = plt.subplots(1, 2) # axs[0].imshow(frame) - # axs[1].imshow(calibrated_frame) + # axs[1].imshow(calibrated_frame * 255) # plt.show() - + # + # closest_ind = None + # closest_diff = 1 + # for i, gtruth_frame in enumerate(gtruth_frames): + # diff = (gtruth_frame != calibrated_frame * 255).any(axis=2).mean() + # if diff < closest_diff: + # closest_ind = i + # closest_diff = diff + # + # gtruth = gtruth_frames[closest_ind] # fig, axs = plt.subplots(1, 2) - # correct_mask = np.logical_not((calibrated_frame != gtruth).any(axis=2)) + # correct_mask = np.logical_not((calibrated_frame * 255 != gtruth).any(axis=2)) # calibrated_frame_copy = calibrated_frame.copy() # gtruth_copy = gtruth.copy() # calibrated_frame_copy[correct_mask] = [0, 0, 0] # gtruth_copy[correct_mask] = [0, 0, 0] # axs[0].imshow(gtruth_copy) - # axs[1].imshow(calibrated_frame_copy) + # axs[1].imshow(calibrated_frame_copy * 255) # plt.show() calibrated_frame = np.packbits( diff --git a/decoding_utils.py b/decoding_utils.py index d3bf2d7..5ad74d2 100644 --- a/decoding_utils.py +++ b/decoding_utils.py @@ -18,7 +18,7 @@ from corner_training.utils import get_gaussian_filter, get_bounded_slices torch.backends.quantized.engine = 'qnnpack' -def localize_corners_wrapper(args, input_crop_size, debug=False): +def localize_corners_wrapper(args, debug=False): stage1_model_checkpt_path = "checkpts/QuantizedV2_Stage1_128_9.pt" stage2_model_checkpt_path = "checkpts/QuantizedV5_Stage2_128_9.pt" @@ -38,27 +38,24 @@ def localize_corners_wrapper(args, input_crop_size, debug=False): torch.ao.quantization.convert(stage2_model, inplace=True) stage2_model.load_state_dict(torch.load(stage2_model_checkpt_path, map_location=torch.device('cpu'))) - stage1_size = 128 - stage2_size = input_crop_size // 16 - - assert stage1_size & 1 == 0, "Assuming even size when dividing into quadrants" - assert stage2_size & 1 == 0, "Assuming even size when center cropping" stage1_model.eval() stage2_model.eval() - preprocess_img_stage1 = transforms.Compose([ - transforms.Lambda(lambda img: cv2.resize(img, (stage1_size, stage1_size), interpolation=cv2.INTER_NEAREST)), + stage1_size = 128 + assert stage1_size & 1 == 0, "Assuming even size when dividing into quadrants" + + np_to_fp32_tensor = transforms.Compose([ transforms.ToImage(), transforms.ToDtype(torch.float32, scale=True), ]) - gaussian_filter = get_gaussian_filter(4, 4) # for stage1 NMS heuristic - - preprocess_img_stage2 = transforms.Compose([ - transforms.ToImage(), - transforms.ToDtype(torch.float32, scale=True), + preprocess_img_stage1 = transforms.Compose([ + transforms.Lambda(lambda img: resize_keep_aspect(img, stage1_size)), + np_to_fp32_tensor, ]) + gaussian_filter = get_gaussian_filter(4, 4) # for stage1 NMS heuristic + # Transform cropped corners until they all look like top left corners, as that's what the model is trained on transforms_by_corner = [ lambda img: img, # identity @@ -75,9 +72,8 @@ def localize_corners_wrapper(args, input_crop_size, debug=False): cropped_frame: Square numpy array """ orig_h, orig_w, _ = cropped_frame.shape - assert orig_w == orig_h, "Assuming square img" - assert orig_w % stage1_size == 0 - upscale_factor = orig_w // stage1_size # for stage 2 + stage2_size = max(orig_h, orig_w) // 16 + upscale_factor = min(orig_w, orig_h) / stage1_size # for stage 2 start_time = time.time() stage1_img = preprocess_img_stage1(cropped_frame) @@ -93,27 +89,28 @@ def localize_corners_wrapper(args, input_crop_size, debug=False): if debug: print(57, time.time() - start_time) - quad_size = stage1_size // 2 + quad_h = stage1_img.size(1) // 2 # might miss 1 pixel on edge if odd + quad_w = stage1_img.size(2) // 2 corners_by_quad = dict() for top_half in (0, 1): # TODO: bot/right to remove all 1 minuses for left_half in (0, 1): - quad_i_start = quad_size * (1 - top_half) - quad_j_start = quad_size * (1 - left_half) + quad_i_start = quad_h * (1 - top_half) + quad_j_start = quad_w * (1 - left_half) curr_quad_preds = stage1_pred[ - quad_i_start: quad_i_start + quad_size, - quad_j_start: quad_j_start + quad_size, + quad_i_start: quad_i_start + quad_h, + quad_j_start: quad_j_start + quad_w, ].clone() max_locs = [] for i in range(6): # expect 4 points, but get top 6 to be safe max_ind = torch.argmax(curr_quad_preds).item() # TODO: more efficient like segtree, maybe account for neighbors too - max_loc = (max_ind // quad_size, max_ind % quad_size) + max_loc = (max_ind // quad_w, max_ind % quad_w) max_locs.append(max_loc) # TODO: improve, maybe scale Gaussian peak to val of max_loc, probably better to not subtract from a location multiple times - preds_slice, gaussian_slice = get_bounded_slices((quad_size, quad_size), gaussian_filter.size(), + preds_slice, gaussian_slice = get_bounded_slices((quad_h, quad_w), gaussian_filter.size(), *max_loc) curr_quad_preds[preds_slice] -= gaussian_filter[gaussian_slice] @@ -122,7 +119,7 @@ def localize_corners_wrapper(args, input_crop_size, debug=False): min_cost = 1e9 min_square = None - for potential_combo in itertools.combinations(max_locs, 4): # TODO: don't repeat symmetrical squares + for potential_combo in itertools.combinations(max_locs, 4): curr_pts, curr_cost = score_combo(potential_combo) if curr_cost < min_cost: min_cost = curr_cost @@ -139,7 +136,7 @@ def localize_corners_wrapper(args, input_crop_size, debug=False): outer_corners = [] # corner_colors = [] # by center, currently rounding to the pixel in the original image - origin = (quad_size, quad_size) + origin = (quad_h, quad_w) for quad in range(4): # TODO: consistent (x, y) or (i, j) outer_corners.append(max((l2_dist(corner, origin), corner) for corner in corners_by_quad[quad])[1]) # corner_colors.append(cropped_frame[int((sum(corner[0] for corner in corners_by_quad[quad]) / 4 * upscale_factor)), @@ -152,7 +149,7 @@ def localize_corners_wrapper(args, input_crop_size, debug=False): for left_half in (0, 1): corner_ind = top_half * 2 + left_half y, x = outer_corners[corner_ind] - upscaled_y, upscaled_x = y * upscale_factor, x * upscale_factor + upscaled_y, upscaled_x = round(y * upscale_factor), round(x * upscale_factor) top = max(0, upscaled_y - stage2_size // 2) bottom = min(orig_h, upscaled_y + stage2_size // 2) @@ -164,7 +161,7 @@ def localize_corners_wrapper(args, input_crop_size, debug=False): corner_padding[(1 - top_half) * 2 + 1] = stage2_size - (bottom - top) corner_padding[(1 - left_half) * 2] = stage2_size - (right - left) cropped_corner_img = transforms_f.pad( # TODO: don't pad since that should speed up inference - preprocess_img_stage2(cropped_frame[top:bottom, left:right]), + np_to_fp32_tensor(cropped_frame[top:bottom, left:right]), corner_padding ) stage2_imgs.append(cropped_corner_img) @@ -195,8 +192,8 @@ def localize_corners_wrapper(args, input_crop_size, debug=False): if debug: print(137, time.time() - start_time) - orig_pred_pts = [(orig_x * upscale_factor + stage2_pred_x - stage2_size // 2, - orig_y * upscale_factor + stage2_pred_y - stage2_size // 2) + orig_pred_pts = [(round(orig_x * upscale_factor) + stage2_pred_x - stage2_size // 2, + round(orig_y * upscale_factor) + stage2_pred_y - stage2_size // 2) for (orig_y, orig_x), (stage2_pred_x, stage2_pred_y) in zip(outer_corners, stage2_pred_pts)] if debug: @@ -206,56 +203,18 @@ def localize_corners_wrapper(args, input_crop_size, debug=False): # plt.scatter(np.array(orig_pred_pts).T[0], np.array(orig_pred_pts).T[1]) # plt.show() - cheight = int(args.height * 0.15) - cwidth = int(args.width * 0.15) - cch = int(args.height * 0.15) // 4 - ccw = int(args.width * 0.15) // 4 - - # plt.imshow(cropped_frame) - # plt.show() + corner_size = int(max(args.height, args.width) * 0.16) + qtr_corner_size = corner_size // 4 grid_coords = np.float32([ - [ccw, cch], - [args.width - ccw, cch], - [ccw, args.height - cch], - [args.width - ccw, args.height - cch], + [qtr_corner_size, qtr_corner_size], + [args.width - qtr_corner_size, qtr_corner_size], + [qtr_corner_size, args.height - qtr_corner_size], + [args.width - qtr_corner_size, args.height - qtr_corner_size], ]) grid_coords -= 1/2 - # - # grid_coords *= orig_w / args.width - # torch_frame = transforms_f.perspective( - # transforms.Compose([ - # # transforms.Lambda( - # # lambda img: cv2.resize(img, (stage1_size, stage1_size), interpolation=cv2.INTER_NEAREST)), - # transforms.ToImage(), - # transforms.ToDtype(torch.float32, scale=True), - # ])(cropped_frame), - # orig_pred_pts, - # grid_coords, - # ) - # - # torch_frame = cv2.resize(torch_frame.permute(1, 2, 0).numpy(), (args.width, args.height), interpolation=cv2.INTER_AREA) - # # torch_frame = cv2.resize(torch_frame.permute(1, 2, 0).numpy(), (args.width, args.height), interpolation=cv2.INTER_NEAREST) - # - # # torch_frame = transforms_f.resize(torch_frame, [args.height, args.width]).permute(1, 2, 0).numpy() - # # torch_frame = torch_frame.permute(1, 2, 0).numpy() - # cropped_frame = (torch_frame * 255).astype(np.uint8) - # plt.imshow(cropped_frame) - # - # plt.axis("off") - # plt.show() - - - - # grid_coords = np.float32([ - # [ccw, cch], - # [args.width - ccw - 1, cch], - # [ccw, args.height - cch - 1], - # [args.width - ccw - 1, args.height - cch - 1], - # ]) - M = cv2.getPerspectiveTransform( np.float32(orig_pred_pts), grid_coords, @@ -267,14 +226,14 @@ def localize_corners_wrapper(args, input_crop_size, debug=False): padding = math.ceil(max(args.height, args.width) / 80) # arbitrary # guessing wildly on +/- 1s - white_sq = cropped_frame[cch + padding: cheight - cch - padding, - ccw + padding: cwidth - ccw - padding] - red_sq = cropped_frame[cch + padding: cheight - cch - padding, - args.width - cwidth + ccw + padding: args.width - ccw - padding] - green_sq = cropped_frame[args.height - cheight + cch + padding: args.height - cch - padding, - ccw + padding: cwidth - ccw - padding] - blue_sq = cropped_frame[args.height - cheight + cch + padding: args.height - cch - padding, - args.width - cwidth + ccw + padding: args.width - ccw - padding] + white_sq = cropped_frame[qtr_corner_size + padding: corner_size - qtr_corner_size - padding, + qtr_corner_size + padding: corner_size - qtr_corner_size - padding] + red_sq = cropped_frame[qtr_corner_size + padding: corner_size - qtr_corner_size - padding, + args.width - corner_size + qtr_corner_size + padding: args.width - qtr_corner_size - padding] + green_sq = cropped_frame[args.height - corner_size + qtr_corner_size + padding: args.height - qtr_corner_size - padding, + qtr_corner_size + padding: corner_size - qtr_corner_size - padding] + blue_sq = cropped_frame[args.height - corner_size + qtr_corner_size + padding: args.height - qtr_corner_size - padding, + args.width - corner_size + qtr_corner_size + padding: args.width - qtr_corner_size - padding] corner_colors = [white_sq.mean(axis=(0, 1)), red_sq.mean(axis=(0, 1)), green_sq.mean(axis=(0, 1)), blue_sq.mean(axis=(0, 1))] @@ -323,6 +282,16 @@ def score_combo(combo): return hull, (max(side_lens) - min(side_lens)) / min(side_lens) +def resize_keep_aspect(img: np.ndarray, min_len: int) -> np.ndarray: + h, w, _ = img.shape + if h < w: + output_size = (round(min_len * w / h), min_len) + else: + output_size = (min_len, round(min_len * h / w)) + + return cv2.resize(img, output_size, interpolation=cv2.INTER_NEAREST) + + # Gift wrapping code, adapted from GeeksForGeeks. # "This code is contributed by Akarsh Somani, IIIT Kalyani" class Point: @@ -20,10 +20,12 @@ args = parser.parse_args() if args.version == 0: cheight = cwidth = max(args.height // 10, args.width // 10) elif args.version == 1: - # cell borders are 0.0375% of width/height - assert args.height * 3 % 80 == args.width * 3 % 80 == 0 # TODO: less strict better ratio - cheight = int(args.height * 0.15) - cwidth = int(args.width * 0.15) + # # cell borders are 0.0375% of width/height + # assert args.height * 3 % 80 == args.width * 3 % 80 == 0 # TODO: less strict better ratio + # cheight = int(args.height * 0.15) + # cwidth = int(args.width * 0.15) + + cheight = cwidth = int(max(args.height, args.width) * 0.16) else: raise NotImplementedError @@ -74,6 +76,8 @@ def mkframe(packet): frame = np.unpackbits(frame) # Pad to be multiple of 3 so we can reshape into RGB channels frame = np.pad(frame, (0, (3 - len(frame)) % 3)) + print(frame_size) + print(frame.shape) frame = np.reshape(frame, (frame_size, 3)) frame = np.concatenate( ( |