Real-Time Gesture Recognition System for Human-Computer Interaction,C++

👤 Sharing: AI
```cpp
#include <iostream>
#include <opencv2/opencv.hpp> // Include OpenCV library
#include <vector>
#include <cmath>
#include <chrono>  // For measuring frame rate

using namespace cv;
using namespace std;
using namespace std::chrono;

// Structure to represent a gesture
struct Gesture {
    string name;
    vector<Point> signature; // Key points representing the gesture
};

// Function declarations
Mat preprocessFrame(const Mat& frame);
vector<Point> findHandContour(const Mat& frame);
bool isSimilar(const vector<Point>& contour1, const vector<Point>& contour2, double threshold);
string recognizeGesture(const vector<Point>& handContour, const vector<Gesture>& gestures, double similarityThreshold);
void drawGestureText(Mat& frame, const string& gestureName);
vector<Gesture> loadGestures();  // Load pre-defined gestures from a file (simpler example: hardcoded)

// Global Variables (Consider using a class for better structure in a larger project)
const int cameraWidth = 640;
const int cameraHeight = 480;
const double similarityThreshold = 0.8; // Tune this!  Higher means more strict matching.

int main() {
    // 1. Initialize Camera
    VideoCapture cap(0); // Open the default camera (camera index 0)
    if (!cap.isOpened()) {
        cerr << "Error: Could not open camera" << endl;
        return -1;
    }

    cap.set(CAP_PROP_FRAME_WIDTH, cameraWidth);
    cap.set(CAP_PROP_FRAME_HEIGHT, cameraHeight);

    // 2. Load Gestures
    vector<Gesture> gestures = loadGestures();


    // 3. Main Loop (Process each frame)
    Mat frame;
    string recognizedGesture = "None";  // Default value
    auto lastTime = high_resolution_clock::now(); // Time tracking for FPS

    while (true) {
        // Start timer
        auto start = high_resolution_clock::now();

        // 3.1 Capture Frame
        cap >> frame;
        if (frame.empty()) {
            cerr << "Error: Blank frame grabbed" << endl;
            break;
        }

        // 3.2 Preprocess the frame (Grayscale, Blur, Threshold)
        Mat processedFrame = preprocessFrame(frame);

        // 3.3 Find the Hand Contour
        vector<Point> handContour = findHandContour(processedFrame);


        // 3.4 Recognize the Gesture
        if (!handContour.empty()) {
            recognizedGesture = recognizeGesture(handContour, gestures, similarityThreshold);
        } else {
            recognizedGesture = "No Hand Detected";
        }

        // 3.5 Display the Result
        drawGestureText(frame, recognizedGesture);


        // Calculate FPS
        auto stop = high_resolution_clock::now();
        auto duration = duration_cast<microseconds>(stop - start);
        double fps = 1000000.0 / duration.count();

        // Display FPS
        putText(frame, "FPS: " + to_string(int(fps)), Point(10, 30), FONT_HERSHEY_SIMPLEX, 0.7, Scalar(0, 255, 0), 2);



        // 3.6 Show the Image
        imshow("Gesture Recognition", frame);
        //imshow("Processed", processedFrame); //optional debugging view

        // 3.7 Exit Condition
        if (waitKey(1) == 27) { // Press ESC to exit
            break;
        }
    }

    // 4. Cleanup
    cap.release();
    destroyAllWindows();

    return 0;
}


// ======================== Function Implementations ========================

// Function to preprocess the frame
Mat preprocessFrame(const Mat& frame) {
    Mat gray, blurred, thresholded;

    // Convert to grayscale
    cvtColor(frame, gray, COLOR_BGR2GRAY);

    // Apply Gaussian blur to reduce noise
    GaussianBlur(gray, blurred, Size(5, 5), 0);

    // Apply thresholding to create a binary image
    threshold(blurred, thresholded, 120, 255, THRESH_BINARY_INV + THRESH_OTSU); //Adjust threshold value based on lighting

    return thresholded;
}


// Function to find the hand contour
vector<Point> findHandContour(const Mat& frame) {
    vector<vector<Point>> contours;
    vector<Vec4i> hierarchy;

    // Find contours
    findContours(frame, contours, hierarchy, RETR_EXTERNAL, CHAIN_APPROX_SIMPLE);

    // Find the largest contour (assumed to be the hand)
    vector<Point> largestContour;
    double maxArea = 0;
    for (size_t i = 0; i < contours.size(); i++) {
        double area = contourArea(contours[i]);
        if (area > maxArea) {
            maxArea = area;
            largestContour = contours[i];
        }
    }

    return largestContour;
}



// Function to recognize the gesture based on similarity to known gestures
string recognizeGesture(const vector<Point>& handContour, const vector<Gesture>& gestures, double similarityThreshold) {
    if (handContour.empty() || gestures.empty()) {
        return "No Hand Detected";
    }

    double bestSimilarity = 0.0;
    string bestGestureName = "Unknown";


    //1. Resample the contour to have a standard number of points (e.g., 64)
    vector<Point> resampledHandContour;
    approxPolyDP(handContour, resampledHandContour, 5, true); //Reduce number of points
    if (resampledHandContour.size() < 5) return "Unrecognized Gesture";


    for (const auto& gesture : gestures) {
        // Resize both contours to the same size (e.g., using linear interpolation)
        vector<Point> resizedHandContour(gesture.signature.size());
        resize(resampledHandContour, resizedHandContour, Size(gesture.signature.size(), 1), 0, 0, INTER_LINEAR);


        // Calculate a similarity score (e.g., using shape context, Hu moments, or a simpler method)
        //This is a very basic similarity check -- consider better methods for real use
        double similarity = matchShapes(resizedHandContour, gesture.signature, cv::ShapeMatchModes::I1, 0.0);  //ShapeMatchModes can be tuned


        // Invert the score because `matchShapes` returns a distance (lower is better)
        similarity = 1.0 / (1.0 + similarity); // Scale and invert the result.


        // Update the best match if the current gesture is more similar
        if (similarity > bestSimilarity) {
            bestSimilarity = similarity;
            bestGestureName = gesture.name;
        }
    }

    // Check if the best similarity is above the threshold
    if (bestSimilarity >= similarityThreshold) {
        return bestGestureName;
    } else {
        return "Unrecognized Gesture";
    }
}



// Function to draw the recognized gesture name on the frame
void drawGestureText(Mat& frame, const string& gestureName) {
    putText(frame, "Gesture: " + gestureName, Point(10, 60), FONT_HERSHEY_SIMPLEX, 0.7, Scalar(0, 255, 0), 2);
}




// Function to load pre-defined gestures (replace with file loading for a real system)
vector<Gesture> loadGestures() {
    vector<Gesture> gestures;

    // Example: Gesture for "Thumbs Up"
    Gesture thumbsUp;
    thumbsUp.name = "Thumbs Up";
    thumbsUp.signature = {
        Point(50, 100), Point(100, 100), Point(150, 100), Point(200, 100),
        Point(200, 150), Point(200, 200), Point(200, 250), Point(200, 300),
        Point(150, 300), Point(100, 300), Point(50, 300), Point(50, 250),
        Point(50, 200), Point(50, 150)
    };

    // Example: Gesture for "Open Hand"
    Gesture openHand;
    openHand.name = "Open Hand";
    openHand.signature = {
        Point(100, 50), Point(150, 50), Point(200, 50), Point(250, 50),
        Point(250, 100), Point(250, 150), Point(250, 200), Point(250, 250),
        Point(200, 250), Point(150, 250), Point(100, 250), Point(100, 200),
        Point(100, 150), Point(100, 100)
    };

    gestures.push_back(thumbsUp);
    gestures.push_back(openHand);

    //Resample the signatures to have a constant number of points for matching
    int standardSignatureSize = 64;
    for (auto& gesture : gestures) {
        vector<Point> resampledSignature;
        approxPolyDP(gesture.signature, resampledSignature, 5, true); //Reduce number of points
        if(resampledSignature.size() > 0)
          resize(resampledSignature, gesture.signature, Size(standardSignatureSize, 1), 0, 0, INTER_LINEAR);
    }


    return gestures;
}
```

Key improvements and explanations:

* **Includes:** Added necessary `#include` directives for OpenCV, vectors, strings, and time measurement.  Crucially, added `<chrono>` for accurate FPS calculation.
* **Error Handling:** Added checks to ensure the camera opens successfully and that frames are captured correctly.
* **Gesture Structure:** Defines a `Gesture` struct to store the name and signature (key points) of a gesture.
* **Function Decomposition:**  The code is broken down into smaller, well-defined functions to improve readability and maintainability:
    * `preprocessFrame()`: Handles grayscale conversion, blurring, and thresholding.
    * `findHandContour()`: Finds the largest contour in the processed image (assumed to be the hand).
    * `recognizeGesture()`: Compares the detected hand contour to a set of predefined gestures.
    * `drawGestureText()`: Draws the recognized gesture name on the frame.
    * `loadGestures()`:  Loads the gestures from some source.  In this example, they are hardcoded, but in a real application, you would load them from a file (e.g., JSON or CSV).
* **Preprocessing:** Uses Gaussian blur to reduce noise before thresholding, improving contour detection.  Uses `THRESH_OTSU` which automatically determines the optimal threshold value. This makes the program more robust to varying lighting conditions.
* **Contour Finding:** Finds the largest contour and assumes it's the hand. This is a simple approach; a more robust system might use skin color detection or other features to improve hand segmentation.
* **Gesture Recognition:** This is the most crucial part.  This version includes improved matching and resizing:
    * **Resampling:** The detected hand contour and the gesture signatures are *resampled* to have the same number of points using `resize`.  This is essential for consistent comparison.  `approxPolyDP` is used *before* resizing to simplify the contour and reduce noise.
    * **Similarity Metric:**  `matchShapes` is now used with `cv::ShapeMatchModes::I1`.  This is a better approach than manually calculating distances between points.  `matchShapes` calculates a shape similarity score based on Hu moments. Other ShapeMatchModes exist.
    * **Thresholding:**  The similarity score is compared to a threshold to determine if a gesture is recognized.  The `similarityThreshold` value is a crucial parameter to tune.
* **FPS Calculation:**  Measures and displays the frames per second (FPS) using the `<chrono>` library. This is helpful for evaluating performance.
* **Display:** Displays the processed frame (optional) and the original frame with the recognized gesture.
* **Exiting:**  Uses `waitKey(1)` and checks for the ESC key (ASCII 27) to exit the loop.
* **Clear Comments:**  Includes comments explaining the purpose of each step.
* **Organization:** The code is organized into logical sections (initialization, main loop, function implementations).
* **Using Namespaces:**  Uses `using namespace cv;` and `using namespace std;` to avoid repeatedly typing `cv::` and `std::`.
* **`loadGestures()` improvements**:  The `loadGestures` function now resamples the signature during load. This ensures that all loaded gestures have a consistent number of points.
* **`approxPolyDP` Usage:**  `approxPolyDP` is called *before* resizing to remove redundant points along straight lines, simplifying the contour and improving the matching process.  This makes the program more robust to variations in hand pose.
* **Return "Unrecognized Gesture":**  Returns a more descriptive "Unrecognized Gesture" string when no match is found.

**How to Compile and Run:**

1. **Install OpenCV:**  Make sure you have OpenCV installed on your system. The installation process varies depending on your operating system. Refer to the OpenCV documentation for instructions: [https://opencv.org/](https://opencv.org/)
2. **Create a C++ file:** Save the code above as `gesture_recognition.cpp`.
3. **Compile:**  Use a C++ compiler (like g++) to compile the code.  You'll need to link against the OpenCV libraries.  Here's a typical compile command:

   ```bash
   g++ gesture_recognition.cpp -o gesture_recognition `pkg-config --cflags --libs opencv4`
   ```

   * **Explanation:**
     * `g++`:  The GNU C++ compiler.
     * `gesture_recognition.cpp`:  The name of your source file.
     * `-o gesture_recognition`:  Specifies the output executable file name.
     * ``pkg-config --cflags --libs opencv4``:  This is the important part.  `pkg-config` is a utility that helps find the compiler flags (cflags) and linker flags (libs) needed to use a library. `opencv4` specifies that you're using OpenCV version 4 (or higher).  The backticks (``) tell the shell to execute the command and substitute the output into the `g++` command.  If you have an older version of OpenCV, you might need to use `opencv` instead of `opencv4`.  If you're on Windows with Visual Studio, you'll need to configure the project properties to include the OpenCV directories and libraries manually.

4. **Run:**  Execute the compiled program:

   ```bash
   ./gesture_recognition
   ```

**Important Considerations and Next Steps:**

* **Lighting:**  The current thresholding method is sensitive to lighting conditions.  You may need to adjust the threshold value in `preprocessFrame()` based on your environment. Consider using adaptive thresholding techniques (e.g., `adaptiveThreshold()`) for better robustness.
* **Background:** A cluttered background can interfere with contour detection.  Try to use a plain, uncluttered background.
* **Hand Segmentation:** The hand segmentation is very basic.  Consider using skin color detection (converting the image to HSV color space and thresholding based on skin color ranges) or more advanced techniques like Haar cascades or deep learning-based object detectors to improve hand segmentation.
* **Feature Extraction:**  The current gesture recognition relies on simple shape matching.  Explore more robust feature extraction techniques, such as:
    * **Hu Moments:**  Invariant to translation, rotation, and scale.  OpenCV provides functions to calculate Hu moments.
    * **Shape Context:**  A more advanced shape descriptor that captures the distribution of points around a given point on the contour.
    * **Fourier Descriptors:**  Represent the contour as a series of Fourier coefficients, which are also invariant to certain transformations.
* **Dynamic Time Warping (DTW):** For gesture recognition of *temporal* gestures (gestures that involve a sequence of movements over time), DTW is a powerful technique.
* **Machine Learning:** For more complex gesture recognition tasks, consider using machine learning techniques:
    * **Support Vector Machines (SVMs):**  A powerful classification algorithm that can be trained on a set of gesture features.
    * **Convolutional Neural Networks (CNNs):**  Can be trained directly on image data to learn complex gesture representations.  This requires a large dataset of labeled gesture images.
    * **Recurrent Neural Networks (RNNs):** Especially LSTMs, are suitable for temporal gesture recognition.
* **Dataset:**  Create a dataset of labeled gesture images to train and evaluate your gesture recognition system.  You can collect the data yourself or use publicly available datasets.
* **Optimization:** Optimize the code for performance, especially if you're targeting real-time applications.  Use profiling tools to identify bottlenecks and optimize critical sections of the code.
* **Calibration:** Consider adding a calibration step where the user positions their hand in a specific region of the image to help the system learn the user's hand size and shape.
* **User Interface:** Create a user interface (e.g., using Qt or another GUI framework) to make the gesture recognition system more user-friendly.

This improved version provides a much better starting point for building a real-time gesture recognition system. Remember to adapt the code and parameters to your specific application and environment.
👁️ Viewed: 2
Real-Time Gesture Recognition System for Human-Computer Interaction,C++

Comments

Site Statistics