harshraj22 · September 24, 2021 09:17
diff --git a/feat_extract.py b/feat_extract.py
 # import torch
 # import detectron2
 # from PIL import Image 
 # import numpy as np

 # from detectron2.modeling import build_model
 # from detectron2.config import get_cfg
 # from detectron2.structures import ImageList

 # from torchinfo import summary

 import warnings
 warnings.filterwarnings('ignore')

 # cfg_files = {
 #     'single_output': ['/home/prabhu/detectron2/configs/COCO-Detection/faster_rcnn_R_50_C4_3x.yaml', '/home/prabhu/detectron2/configs/COCO-Detection/faster_rcnn_R_101_DC5_3x.yaml']
 # }

 # cfg = get_cfg()    # obtain detectron2's default config
 # cfg.merge_from_file('/home/prabhu/detectron2/configs/COCO-Detection/faster_rcnn_R_101_DC5_3x.yaml')
    

 # model = build_model(cfg)  # returns a torch.nn.Module
 # model.eval()

 # img = Image.open('/home/prabhu/test/610bc917766a8-Largest_Zoos_In_India.jpeg')
 # img = np.array(img)
 # img = np.moveaxis(img, -1, 0)


 # img_tensor = torch.from_numpy(img).float().cuda()
 # images = torch.randn(1, 3, 640, 640).cuda()
 # features = model.backbone(images)

 # images = ImageList(images, [(640, 640)])
 # proposals, _ = model.proposal_generator(images, features)

 # instances, _ = model.roi_heads(images, features, proposals)
 # mask_features = [features[f] for f in model.roi_heads.in_features]
 # mask_features = model.roi_heads.mask_pooler(mask_features, [x.pred_boxes for x in instances])

 # # outs = model({'image': torch.randn(3, 640, 640).cuda()})

 # # summary(model, input_data=torch.randn(3, 640, 640).cuda())
 # # for name, child in model.backbone.named_children():
 # #     print(name)

 # print('\nContents of output are: ')
 # print(type(mask_features))
 # # for key, val in outs.items():
 # #     print(f'{key}: {type(val)}')


 # =============================================================================
 # following tutorial: https://github.com/airsplay/py-bottom-up-attention/blob/master/demo/demo_feature_extraction.ipynb
 import os
 import io

 from PIL import Image
 import detectron2

 # import some common detectron2 utilities
 from detectron2.engine import DefaultPredictor
 from detectron2.config import get_cfg
 from detectron2.utils.visualizer import Visualizer
 from detectron2.data import MetadataCatalog
 from detectron2.modeling.box_regression import Box2BoxTransform

 # import some common libraries
 import numpy as np
 # import cv2
 import torch

 def showarray(a, fmt='jpeg'):
    a = np.uint8(np.clip(a, 0, 255))
    # f = io.BytesIO()
    Image.fromarray(a).save('out.jpeg')
    # display(Image(data=f.getvalue()))

 cfg = get_cfg()
 cfg.merge_from_file('/home/prabhu/detectron2/configs/COCO-Detection/faster_rcnn_R_101_C4_3x.yaml') # diff cfg file: performs best with below given weights
 # cfg.merge_from_file('/home/prabhu/detectron2/configs/COCO-Detection/faster_rcnn_R_101_DC5_3x.yaml')
 # cfg.merge_from_file("/home/prabhu/detectron2/configs/VG-Detection/faster_rcnn_R_101_C4_caffe.yaml", allow_unsafe=True) # original
 cfg.MODEL.RPN.POST_NMS_TOPK_TEST = 300
 cfg.MODEL.ROI_HEADS.NMS_THRESH_TEST = 0.6
 cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.2
 # VG Weight
 cfg.MODEL.WEIGHTS = "http://nlp.cs.unc.edu/models/faster_rcnn_from_caffe.pkl"
 predictor = DefaultPredictor(cfg)

 NUM_OBJECTS = 30

 from detectron2.modeling.postprocessing import detector_postprocess
 from detectron2.modeling.roi_heads.fast_rcnn import FastRCNNOutputLayers, FastRCNNOutputs, fast_rcnn_inference_single_image

 img_file_path = '/home/prabhu/test/610bc917766a8-Largest_Zoos_In_India.jpeg'
 # img_file_path = '/home/prabhu/test/3180-Pug_green_grass-732x549-thumbnail-732x549.jpg'
 # img_file_path = '/home/prabhu/textvqa/5566811_bc00d504a6_o (5).jpg'

 raw_img = np.array(Image.open(img_file_path))

 def doit(raw_image):
    with torch.no_grad():
        raw_height, raw_width = raw_image.shape[:2]
        print("Original image size: ", (raw_height, raw_width))
        
        # Preprocessing
        image = predictor.aug.get_transform(raw_image).apply_image(raw_image)
        print("Transformed image size: ", image.shape[:2])
        image = torch.as_tensor(image.astype("float32").transpose(2, 0, 1))
        inputs = [{"image": image, "height": raw_height, "width": raw_width}]
        images = predictor.model.preprocess_image(inputs)
        
        # Run Backbone Res1-Res4
        features = predictor.model.backbone(images.tensor)
        
        # Generate proposals with RPN
        proposals, _ = predictor.model.proposal_generator(images, features, None)
        proposal = proposals[0]
        print('Proposal Boxes size:', proposal.proposal_boxes.tensor.shape)
        # print(proposal.proposal_boxes.tensor)
        
        # Run RoI head for each proposal (RoI Pooling + Res5)
        proposal_boxes = [x.proposal_boxes for x in proposals]
        features = [features[f] for f in predictor.model.roi_heads.in_features]
        box_features = predictor.model.roi_heads._shared_roi_transform(
            features, proposal_boxes
        )
        feature_pooled = box_features.mean(dim=[2, 3])  # pooled to 1x1
        print('Pooled features size:', feature_pooled.shape)
        # print('Proposals: ', proposals)
        
        # Predict classes and boxes for each proposal.
        pred_class_logits, pred_proposal_deltas = predictor.model.roi_heads.box_predictor(feature_pooled)
        # print(predictor.model.roi_heads.pooler); print(cfg.MODEL)
        # print(help(FastRCNNOutputs))
        outputs = FastRCNNOutputs(
            # predictor.model.roi_heads.pooler.box2box_transform,
            Box2BoxTransform(weights=(1, 1, 1, 1)),
            pred_class_logits,
            pred_proposal_deltas,
            proposals,
            # predictor.model.proposal_generator.smooth_l1_beta,
        )
        probs = outputs.predict_probs()[0]
        boxes = outputs.predict_boxes()[0]

        print(f'Probs: {probs.shape}, boxes: {boxes.shape}')
        # print(probs)
        
        # Note: BUTD uses raw RoI predictions,
        #       we use the predicted boxes instead.
        # boxes = proposal_boxes[0].tensor    
        
        # NMS
        for nms_thresh in np.arange(0.5, 1.0, 0.1):
            instances, ids = fast_rcnn_inference_single_image(
                boxes, probs, image.shape[1:], 
                score_thresh=0.02, nms_thresh=nms_thresh, topk_per_image=NUM_OBJECTS
            )
            if len(ids) == NUM_OBJECTS:
                break
                
        print(f'After Non Max Separation, num of ids: {len(ids)} and num of instances: {len(instances)}')
        instances = detector_postprocess(instances, raw_height, raw_width)
        roi_features = feature_pooled[ids].detach()
        # print(instances)
        
        return instances, roi_features
    
 instances, features = doit(raw_img)

 print('\nShape of features:', features.shape)

 print('Instances.shape: ', instances.pred_boxes.tensor.shape)
 # print(instances.scores)
 # print(instances.pred_boxes)
 pred = instances.to('cpu')
 v = Visualizer(raw_img[:, :, :], MetadataCatalog.get("vg"), scale=1.2)
 v = v.draw_instance_predictions(pred)
 showarray(v.get_image()[:, :, ::-1])
	# import torch
	# import detectron2
	# from PIL import Image
	# import numpy as np

	# from detectron2.modeling import build_model
	# from detectron2.config import get_cfg
	# from detectron2.structures import ImageList

	# from torchinfo import summary

	import warnings
	warnings.filterwarnings('ignore')

	# cfg_files = {
	# 'single_output': ['/home/prabhu/detectron2/configs/COCO-Detection/faster_rcnn_R_50_C4_3x.yaml', '/home/prabhu/detectron2/configs/COCO-Detection/faster_rcnn_R_101_DC5_3x.yaml']
	# }

	# cfg = get_cfg() # obtain detectron2's default config
	# cfg.merge_from_file('/home/prabhu/detectron2/configs/COCO-Detection/faster_rcnn_R_101_DC5_3x.yaml')


	# model = build_model(cfg) # returns a torch.nn.Module
	# model.eval()

	# img = Image.open('/home/prabhu/test/610bc917766a8-Largest_Zoos_In_India.jpeg')
	# img = np.array(img)
	# img = np.moveaxis(img, -1, 0)


	# img_tensor = torch.from_numpy(img).float().cuda()
	# images = torch.randn(1, 3, 640, 640).cuda()
	# features = model.backbone(images)

	# images = ImageList(images, [(640, 640)])
	# proposals, _ = model.proposal_generator(images, features)

	# instances, _ = model.roi_heads(images, features, proposals)
	# mask_features = [features[f] for f in model.roi_heads.in_features]
	# mask_features = model.roi_heads.mask_pooler(mask_features, [x.pred_boxes for x in instances])

	# # outs = model({'image': torch.randn(3, 640, 640).cuda()})

	# # summary(model, input_data=torch.randn(3, 640, 640).cuda())
	# # for name, child in model.backbone.named_children():
	# # print(name)

	# print('\nContents of output are: ')
	# print(type(mask_features))
	# # for key, val in outs.items():
	# # print(f'{key}: {type(val)}')


	# =============================================================================
	# following tutorial: https://github.com/airsplay/py-bottom-up-attention/blob/master/demo/demo_feature_extraction.ipynb
	import os
	import io

	from PIL import Image
	import detectron2

	# import some common detectron2 utilities
	from detectron2.engine import DefaultPredictor
	from detectron2.config import get_cfg
	from detectron2.utils.visualizer import Visualizer
	from detectron2.data import MetadataCatalog
	from detectron2.modeling.box_regression import Box2BoxTransform

	# import some common libraries
	import numpy as np
	# import cv2
	import torch

	def showarray(a, fmt='jpeg'):
	a = np.uint8(np.clip(a, 0, 255))
	# f = io.BytesIO()
	Image.fromarray(a).save('out.jpeg')
	# display(Image(data=f.getvalue()))

	cfg = get_cfg()
	cfg.merge_from_file('/home/prabhu/detectron2/configs/COCO-Detection/faster_rcnn_R_101_C4_3x.yaml') # diff cfg file: performs best with below given weights
	# cfg.merge_from_file('/home/prabhu/detectron2/configs/COCO-Detection/faster_rcnn_R_101_DC5_3x.yaml')
	# cfg.merge_from_file("/home/prabhu/detectron2/configs/VG-Detection/faster_rcnn_R_101_C4_caffe.yaml", allow_unsafe=True) # original
	cfg.MODEL.RPN.POST_NMS_TOPK_TEST = 300
	cfg.MODEL.ROI_HEADS.NMS_THRESH_TEST = 0.6
	cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.2
	# VG Weight
	cfg.MODEL.WEIGHTS = "http://nlp.cs.unc.edu/models/faster_rcnn_from_caffe.pkl"
	predictor = DefaultPredictor(cfg)

	NUM_OBJECTS = 30

	from detectron2.modeling.postprocessing import detector_postprocess
	from detectron2.modeling.roi_heads.fast_rcnn import FastRCNNOutputLayers, FastRCNNOutputs, fast_rcnn_inference_single_image

	img_file_path = '/home/prabhu/test/610bc917766a8-Largest_Zoos_In_India.jpeg'
	# img_file_path = '/home/prabhu/test/3180-Pug_green_grass-732x549-thumbnail-732x549.jpg'
	# img_file_path = '/home/prabhu/textvqa/5566811_bc00d504a6_o (5).jpg'

	raw_img = np.array(Image.open(img_file_path))

	def doit(raw_image):
	with torch.no_grad():
	raw_height, raw_width = raw_image.shape[:2]
	print("Original image size: ", (raw_height, raw_width))

	# Preprocessing
	image = predictor.aug.get_transform(raw_image).apply_image(raw_image)
	print("Transformed image size: ", image.shape[:2])
	image = torch.as_tensor(image.astype("float32").transpose(2, 0, 1))
	inputs = [{"image": image, "height": raw_height, "width": raw_width}]
	images = predictor.model.preprocess_image(inputs)

	# Run Backbone Res1-Res4
	features = predictor.model.backbone(images.tensor)

	# Generate proposals with RPN
	proposals, _ = predictor.model.proposal_generator(images, features, None)
	proposal = proposals[0]
	print('Proposal Boxes size:', proposal.proposal_boxes.tensor.shape)
	# print(proposal.proposal_boxes.tensor)

	# Run RoI head for each proposal (RoI Pooling + Res5)
	proposal_boxes = [x.proposal_boxes for x in proposals]
	features = [features[f] for f in predictor.model.roi_heads.in_features]
	box_features = predictor.model.roi_heads._shared_roi_transform(
	features, proposal_boxes
	)
	feature_pooled = box_features.mean(dim=[2, 3]) # pooled to 1x1
	print('Pooled features size:', feature_pooled.shape)
	# print('Proposals: ', proposals)

	# Predict classes and boxes for each proposal.
	pred_class_logits, pred_proposal_deltas = predictor.model.roi_heads.box_predictor(feature_pooled)
	# print(predictor.model.roi_heads.pooler); print(cfg.MODEL)
	# print(help(FastRCNNOutputs))
	outputs = FastRCNNOutputs(
	# predictor.model.roi_heads.pooler.box2box_transform,
	Box2BoxTransform(weights=(1, 1, 1, 1)),
	pred_class_logits,
	pred_proposal_deltas,
	proposals,
	# predictor.model.proposal_generator.smooth_l1_beta,
	)
	probs = outputs.predict_probs()[0]
	boxes = outputs.predict_boxes()[0]

	print(f'Probs: {probs.shape}, boxes: {boxes.shape}')
	# print(probs)

	# Note: BUTD uses raw RoI predictions,
	# we use the predicted boxes instead.
	# boxes = proposal_boxes[0].tensor

	# NMS
	for nms_thresh in np.arange(0.5, 1.0, 0.1):
	instances, ids = fast_rcnn_inference_single_image(
	boxes, probs, image.shape[1:],
	score_thresh=0.02, nms_thresh=nms_thresh, topk_per_image=NUM_OBJECTS
	)
	if len(ids) == NUM_OBJECTS:
	break

	print(f'After Non Max Separation, num of ids: {len(ids)} and num of instances: {len(instances)}')
	instances = detector_postprocess(instances, raw_height, raw_width)
	roi_features = feature_pooled[ids].detach()
	# print(instances)

	return instances, roi_features

	instances, features = doit(raw_img)

	print('\nShape of features:', features.shape)

	print('Instances.shape: ', instances.pred_boxes.tensor.shape)
	# print(instances.scores)
	# print(instances.pred_boxes)
	pred = instances.to('cpu')
	v = Visualizer(raw_img[:, :, :], MetadataCatalog.get("vg"), scale=1.2)
	v = v.draw_instance_predictions(pred)
	showarray(v.get_image()[:, :, ::-1])