Finding violations in video using computer vision

Automation of video processing in order to detect violations is one of the demanded areas of computer vision in many industries.

Today we will try to detect on the video the absence of the client in the frame at the time of the operation in the automated system.





We have at our disposal records from surveillance cameras of the employee's workplace and a log of operations.





, . MobileNet CSRT Tracker opencv. Tesseract-OCR.





MobileNet. 20 . : .  Github.





cv2 pytesseract. 





!pip install opencv-python
!pip install pytesseract
      
      



pytesseract Tesseract-OCR c .









Tesseract-OCR:





import os

video_path = ... #  
tesseract_path = ... #   Tesseract
os.environ["PATH"] += os.pathsep + tesseract_path
import pytesseract
import cv2
import imutils
import pandas as pd
import datetime as dt
      
      



, . , / :





df = pd.DataFrame(columns = ['', '  '])
work_place = () #,   
date = None #      
tracked = False # 
      
      



, . , :





prototxt = 'MobileNetSSD_deploy.prototxt' #
weights = 'MobileNetSSD_deploy.caffemodel' #
      
      



20 , :





classNames = {0: 'background',
              1: 'aeroplane',
              2: 'bicycle',
              3: 'bird',
              4: 'boat',
              5: 'bottle',
              6: 'bus',
              7: 'car',
              8: 'cat',
              9: 'chair',
              10: 'cow',
              11: 'diningtable',
              12: 'dog',
              13: 'horse',
              14: 'motorbike',
              15: 'person',
              16: 'pottedplant',
              17: 'sheep',
              18: 'sofa',
              19: 'train',
              20: 'tvmonitor'}
      
      



, .





thr = 0.1 # 
      
      



:





net = cv2.dnn.readNetFromCaffe(prototxt, weights) #
      
      



cv2.VideoCapture, :





cap = cv2.VideoCapture(video_path)
      
      



,  .read(), . , . . :





%%time

cap = cv2.VideoCapture(video_path)
total_frame = 0
while True:
    success, frame = cap.read()
    if success:
        total_frame += 1
    else:
        break
video_length = ... #   
fps = round(total_frame / video_length)
fps

      
      



, . 100- 2 .









, , , . , , .





while cap.isOpened():
ret, frame = cap.read()

if ret:
    
    frame = imutils.resize(frame, width=1200) # ,   


    #  ,    
    if len(work_place) == 0:
        cv2.putText(frame, 'Set the client\'s location', (0, 90), cv2.FONT_HERSHEY_SIMPLEX, 
            2, (0,255,0), 2)
        work_place = cv2.selectROI('frame', frame, fromCenter=False, showCrosshair=True)
        x, y, w, h = [int(coord) for coord in work_place]
        
    # 
    if not date:
        try:
            cv2.putText(frame, 'Set the date, (0, 160), cv2.FONT_HERSHEY_SIMPLEX, 
                2, (0,255,0), 2)
            date = cv2.selectROI('frame', frame, fromCenter=False, showCrosshair=True)
            date_x, date_y, date_w, date_h = [int(coord) for coord in date]
            date_ = frame[date_y : date_y+date_h, date_x : date_x+date_w]
            date_ = cv2.cvtColor(date_, cv2.COLOR_BGR2GRAY) #   
            #date_ = cv2.threshold(date_, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
            date_ = cv2.threshold(date_, 180, 255, 0)[1] #     
            date = pytesseract.image_to_string(date_)
            date = dt.datetime.strptime(date, '%Y-%m-%d %H:%M:%S')
            
        except:
            print('   ,      -- ::')
            date_ = input()
            date = dt.datetime.strptime(date_, '%Y-%m-%d %H:%M:%S')
            

    if cap.get(1) % fps == 0:
        date += dt.timedelta(seconds = 1)
    
    if not tracked or (cap.get(1) % (fps * 30) == 0):

        # 
        frame_resized = cv2.resize(frame, (300, 300)) #   300  300 
        blob = cv2.dnn.blobFromImage(frame_resized, 0.007843, 
                                     (300,300), (127.5, 127.5, 127.5), False)

        #    
        net.setInput(blob)
        detections = net.forward() 
        #[0, 0, object, [0, class_id, confidence, xLeftBottom, yLeftBottom, xRightTop, yRightTop]]

        #   
        cols = frame_resized.shape[1]
        rows = frame_resized.shape[0]

        #       
        for obj in detections[0,0, :, :]:
            confidence = obj[2]
            if confidence > thr:

                class_id = int(obj[1])
                if class_id == 15:

                    xLeftBottom = int(obj[3] * cols)
                    yLeftBottom = int(obj[4] * rows)
                    xRightTop   = int(obj[5] * cols)
                    yRightTop   = int(obj[6] * rows)

                    #     
                    heightFactor = frame.shape[0] / 300.0
                    widthFactor = frame.shape[1] / 300.0

                    #    
                    xLeftBottom = int(widthFactor * xLeftBottom)
                    yLeftBottom = int(heightFactor * yLeftBottom)
                    xRightTop   = int(widthFactor * xRightTop)
                    yRightTop   = int(heightFactor * yRightTop)

                    #    
                    xCenter = xLeftBottom + (xRightTop - xLeftBottom)/2
                    yCenter = yLeftBottom + (yRightTop - yLeftBottom)/2

                    #     
                    if xCenter < x + w and yCenter < y + h and xCenter > x and yCenter > y:
                        tracker = cv2.TrackerCSRT_create()
                        tracker.init(frame, (xLeftBottom, yLeftBottom, xRightTop-xLeftBottom, yRightTop-yLeftBottom))
                        tracked = True
                        cv2.rectangle(frame, (xLeftBottom,yLeftBottom), (xRightTop,yRightTop), (0,255,0), 3, 1)
                        break
                    else:
                        tracked = False
    else:
        _, bbox = tracker.update(frame)
        X, Y, W, H = [int(coord) for coord in bbox]

        xCenter = X + W/2
        yCenter = Y + H/2
        
        if xCenter < x + w and yCenter < y + h and xCenter > x and yCenter > y:
            
            tracked = True
            cv2.rectangle(frame, (X,Y), (X + W, Y + H), (255,255,0), 3, 1)
        else:
            tracked = False

    cv2.imshow('frame', frame)
    df.loc[cap.get(1), :] = [date, tracked]
    print(cap.get(1), date, tracked) #  ,   / 
    if cv2.waitKey(1) == 27: #ESC
        break
else:
    break

cap.release()
cv2.destroyAllWindows()
      
      



 .read()  : , , – . , , , .





. ,  «-- ::». , : , .  date



.





 Tesseract-

, . , , .





 .get()  1 , ,  fps



  ,  date



  . tesseract, , .





:  cv2.resize()   cv2.dnn.blobFromImage(). , . ,  detections



. , 20 .





, 15. , ,  tracked



  True  .  tracked



   date



   df



.





, . ,  tracked



   True  False, .





. , , . .





, / . , , . , .





df_ = df.groupby('', as_index=False).agg(max)
df_.to_excel('output.xlsx', index=False)

      
      



, :





  • . , . opencv. , , .





  • . .





  • . , , . «» , .





The first and second problem can be solved by trackers based on deep learning. For example, a tracker  GOTURN



. This tracker is implemented in the library  opencv



, but for its operation you need to download additional files. You can also use the popular tracker  Re3



 or the recently introduced tracker  AcurusTrack



. The third problem can be solved by replacing the neural network and / or retraining it on seated people.





Link to code .








All Articles