Example: vision_bundle_adj_example

This examples demonstrates the function mrpt::vision::bundle_adj_full() with a set of simulated monocular camera observations. See the bundle adjustment module documentation on the C++ API.
This function requires setting MRPT_ALLOW_LGPLV3=ON in CMake when building MRPT.
C++ example source code:
/* +------------------------------------------------------------------------+
   |                     Mobile Robot Programming Toolkit (MRPT)            |
   |                          https://www.mrpt.org/                         |
   |                                                                        |
   | Copyright (c) 2005-2024, Individual contributors, see AUTHORS file     |
   | See: https://www.mrpt.org/Authors - All rights reserved.               |
   | Released under BSD License. See: https://www.mrpt.org/License          |
   +------------------------------------------------------------------------+ */

/* ===========================================================================
    EXAMPLE: bundle_adj_full_demo
    PURPOSE: Demonstrate "mrpt::vision::bundle_adj_full" with a set of
              simulated or real data. If the program is called without command
              line arguments, simulated measurements will be used.
              To use real data, invoke:
                bundle_adj_full_demo  <feats.txt> <cam_model.cfg>

              Where <feats.txt> is a "TSequenceFeatureObservations" saved as
              a text file, and <cam_model.cfg> is a .ini-like file with a
              section named "CAMERA" loadable by mrpt::img::TCamera.

    DATE: 20-Aug-2010
   ===========================================================================
   */

#include <mrpt/config/CConfigFile.h>
#include <mrpt/gui/CDisplayWindow3D.h>
#include <mrpt/gui/CDisplayWindowPlots.h>
#include <mrpt/io/CTextFileLinesParser.h>
#include <mrpt/math/geometry.h>
#include <mrpt/opengl/CGridPlaneXY.h>
#include <mrpt/opengl/CPointCloud.h>
#include <mrpt/opengl/stock_objects.h>
#include <mrpt/poses/CPose3DQuat.h>
#include <mrpt/random.h>
#include <mrpt/system/filesystem.h>
#include <mrpt/vision/bundle_adjustment.h>
#include <mrpt/vision/pinhole.h>

#include <iostream>

using namespace mrpt::literals;  // _deg
using namespace mrpt;
using namespace mrpt::gui;
using namespace mrpt::math;
using namespace mrpt::system;
using namespace mrpt::opengl;
using namespace mrpt::poses;
using namespace mrpt::img;
using namespace mrpt::vision;
using namespace std;

CVectorDouble history_avr_err;

double WORLD_SCALE = 1;  // Will change when loading SBA examples

// A feedback functor, which is called on each iteration by the optimizer to let
// us know on the progress:
void my_BundleAdjustmentFeedbackFunctor(
    const size_t cur_iter, const double cur_total_sq_error,
    const size_t max_iters,
    const TSequenceFeatureObservations& input_observations,
    const TFramePosesVec& current_frame_estimate,
    const TLandmarkLocationsVec& current_landmark_estimate)
{
    const double avr_err =
        std::sqrt(cur_total_sq_error / input_observations.size());
    history_avr_err.push_back(std::log(1e-100 + avr_err));
    if ((cur_iter % 10) == 0)
    {
        cout << "[PROGRESS] Iter: " << cur_iter
             << " avrg err in px: " << avr_err << endl;
        cout.flush();
    }
}

// ------------------------------------------------------
//              bundle_adj_full_demo
// ------------------------------------------------------
void bundle_adj_full_demo(
    const TCamera& camera_params, const TSequenceFeatureObservations& allObs,
    TFramePosesVec& frame_poses, TLandmarkLocationsVec& landmark_points)
{
    cout << "Optimizing " << allObs.size() << " feature observations.\n";

    mrpt::containers::yaml extra_params;
    // extra_params["verbose"] = true;
    extra_params["max_iterations"] = 2000;  // 250;
    // extra_params["num_fix_frames"] = 1;
    // extra_params["num_fix_points"] = 0;
    extra_params["robust_kernel"] = false;
    extra_params["kernel_param"] = 5.0;
    extra_params["profiler"] = true;

    mrpt::vision::bundle_adj_full(
        allObs, camera_params, frame_poses, landmark_points, extra_params,
        &my_BundleAdjustmentFeedbackFunctor);
}
// ---------------------------------------------------------

mrpt::opengl::CSetOfObjects::Ptr framePosesVecVisualize(
    const TFramePosesVec& poses, const double len, const double lineWidth);

// ------------------------------------------------------
//                      MAIN
// ------------------------------------------------------
int main(int argc, char** argv)
{
    try
    {
        // Simulation or real-data? (read at the top of this file):
        if ((argc != 1 && argc != 3 && argc != 4) ||
            (argc == 2 && !strcpy(argv[1], "--help")))
        {
            cout << "Usage:\n"
                 << argv[0] << " --help -> Shows this help\n"
                 << argv[0] << "     -> Simulation\n"
                 << argv[0]
                 << " <feats.txt> <cam_model.cfg> -> Data in MRPT format\n"
                 << argv[0]
                 << " <cams.txt> <points.cfg> <calib.txt> -> SBA format\n";
            return 1;
        }

        // BA data:
        TCamera camera_params;
        TSequenceFeatureObservations allObs;
        TFramePosesVec frame_poses;
        TLandmarkLocationsVec landmark_points;

        // Only for simulation mode:
        TFramePosesVec frame_poses_real,
            frame_poses_noisy;  // Ground truth & starting point
        TLandmarkLocationsVec landmark_points_real,
            landmark_points_noisy;  // Ground truth & starting point

        if (argc == 1)
        {
            random::CRandomGenerator rg(1234);

            //  Simulation
            // --------------------------
            // The projective camera model:
            camera_params.ncols = 800;
            camera_params.nrows = 600;
            camera_params.fx(400);
            camera_params.fy(400);
            camera_params.cx(400);
            camera_params.cy(300);

            //      Generate synthetic dataset:
            // -------------------------------------
            const size_t nPts = 100;  // # of 3D landmarks
            const double L1 =
                60;  // Draw random poses in the rectangle L1xL2xL3
            const double L2 = 10;
            const double L3 = 10;
            const double max_camera_dist = L1 * 4;

            const double cameraPathLen = L1 * 1.2;
            // const double cameraPathEllipRadius1 = L1*2;
            // const double cameraPathEllipRadius2 = L2*2;
            // Noise params:
            const double STD_PX_ERROR = 0.10;  // pixels

            const double STD_PX_ERROR_OUTLIER = 5;  // pixels
            const double PROBABILITY_OUTLIERS = 0;  // 0.01;

            const double STD_PT3D = 0.10;  // meters
            const double STD_CAM_XYZ = 0.05;  // meters
            const double STD_CAM_ANG = 5.0_deg;  // degs

            landmark_points_real.resize(nPts);
            for (size_t i = 0; i < nPts; i++)
            {
                landmark_points_real[i].x = rg.drawUniform(-L1, L1);
                landmark_points_real[i].y = rg.drawUniform(-L2, L2);
                landmark_points_real[i].z = rg.drawUniform(-L3, L3);
            }

            // const double angStep = M_PI*2.0/40;
            const double camPosesSteps = 2 * cameraPathLen / 20;
            frame_poses_real.clear();

            for (double x = -cameraPathLen; x < cameraPathLen;
                 x += camPosesSteps)
            {
                TPose3D p;
                p.x = x;  // cameraPathEllipRadius1 * cos(ang);
                p.y = 4 * L2;  // cameraPathEllipRadius2 * sin(ang);
                p.z = 0;
                p.yaw = -90.0_deg -
                    30.0_deg * x / cameraPathLen;  // wrapToPi(ang+M_PI);
                p.pitch = 0;
                p.roll = 0;
                // Angles above is for +X pointing to the (0,0,0), but we want
                // instead +Z pointing there, as typical in camera models:
                frame_poses_real.push_back(
                    CPose3D(p) + CPose3D(0, 0, 0, -90.0_deg, 0, -90.0_deg));
            }

            // Simulate the feature observations:
            size_t numOutliers = 0;
            allObs.clear();
            map<TCameraPoseID, size_t> numViewedFrom;
            for (size_t i = 0; i < frame_poses_real.size();
                 i++)  // for each pose
            {
                // predict all landmarks:
                for (size_t j = 0; j < landmark_points_real.size(); j++)
                {
                    TPixelCoordf px =
                        mrpt::vision::pinhole::projectPoint_no_distortion<
                            false>(
                            camera_params, frame_poses_real[i],
                            landmark_points_real[j]);

                    const bool is_outlier =
                        (rg.drawUniform(0.0, 1.0) < PROBABILITY_OUTLIERS);
                    px.x += rg.drawGaussian1D(
                        0, is_outlier ? STD_PX_ERROR_OUTLIER : STD_PX_ERROR);
                    px.y += rg.drawGaussian1D(
                        0, is_outlier ? STD_PX_ERROR_OUTLIER : STD_PX_ERROR);

                    // Out of image?
                    if (px.x < 0 || px.y < 0 || px.x > camera_params.ncols ||
                        px.y > camera_params.nrows)
                        continue;

                    // Too far?
                    const double dist = math::distance(
                        TPoint3D(frame_poses_real[i].asTPose()),
                        landmark_points_real[j]);
                    if (dist > max_camera_dist) continue;

                    // Ok, accept it:
                    if (is_outlier) numOutliers++;
                    allObs.push_back(TFeatureObservation(j, i, px));
                    numViewedFrom[i]++;
                }
            }
            cout << "Simulated: " << allObs.size()
                 << " observations (of which: " << numOutliers
                 << " are outliers).\n";

            ASSERT_EQUAL_(numViewedFrom.size(), frame_poses_real.size());
            // Make sure all poses and all LMs appear at least once!
            {
                TSequenceFeatureObservations allObs2 = allObs;
                std::map<TCameraPoseID, TCameraPoseID> old2new_camIDs;
                std::map<TLandmarkID, TLandmarkID> old2new_lmIDs;
                allObs2.compressIDs(&old2new_camIDs, &old2new_lmIDs);

                ASSERT_EQUAL_(old2new_camIDs.size(), frame_poses_real.size());
                ASSERT_EQUAL_(
                    old2new_lmIDs.size(), landmark_points_real.size());
            }

            // Add noise to the data:
            frame_poses_noisy = frame_poses_real;
            landmark_points_noisy = landmark_points_real;
            for (size_t i = 0; i < landmark_points_noisy.size(); i++)
                landmark_points_noisy[i] += TPoint3D(
                    rg.drawGaussian1D(0, STD_PT3D),
                    rg.drawGaussian1D(0, STD_PT3D),
                    rg.drawGaussian1D(0, STD_PT3D));

            for (size_t i = 1; i < frame_poses_noisy.size();
                 i++)  // DON'T add error to frame[0], the global reference!
            {
                CPose3D bef = frame_poses_noisy[i];
                frame_poses_noisy[i].setFromValues(
                    frame_poses_noisy[i].x() +
                        rg.drawGaussian1D(0, STD_CAM_XYZ),
                    frame_poses_noisy[i].y() +
                        rg.drawGaussian1D(0, STD_CAM_XYZ),
                    frame_poses_noisy[i].z() +
                        rg.drawGaussian1D(0, STD_CAM_XYZ),
                    frame_poses_noisy[i].yaw() +
                        rg.drawGaussian1D(0, STD_CAM_ANG),
                    frame_poses_noisy[i].pitch() +
                        rg.drawGaussian1D(0, STD_CAM_ANG),
                    frame_poses_noisy[i].roll() +
                        rg.drawGaussian1D(0, STD_CAM_ANG));
            }

            // Optimize it:
            frame_poses = frame_poses_noisy;
            landmark_points = landmark_points_noisy;

#if 0
            vector<std::array<double,2> > resids;
            const double initial_total_sq_err = mrpt::vision::reprojectionResiduals(allObs,camera_params,frame_poses, landmark_points,resids, false);
            cout << "Initial avr error in px: " << std::sqrt(initial_total_sq_err/allObs.size()) << endl;
#endif

            // Run Bundle Adjustmen
            bundle_adj_full_demo(
                camera_params, allObs, frame_poses, landmark_points);

            // Evaluate vs. ground truth:
            double landmarks_total_sq_err = 0;
            for (size_t i = 0; i < landmark_points.size(); i++)
                landmarks_total_sq_err += square(
                    landmark_points_real[i].distanceTo(landmark_points[i]));

            double cam_point_total_sq_err = 0;
            for (size_t i = 0; i < frame_poses.size(); i++)
                cam_point_total_sq_err +=
                    square(frame_poses[i].distanceTo(frame_poses_real[i]));

            cout << "RMSE of recovered landmark positions: "
                 << std::sqrt(landmarks_total_sq_err / landmark_points.size())
                 << endl;
            cout << "RMSE of recovered camera positions: "
                 << std::sqrt(cam_point_total_sq_err / frame_poses.size())
                 << endl;
        }
        else
        {
            //  Real data
            // --------------------------
            if (argc == 3)
            {
                const string feats_fil = string(argv[1]);
                const string cam_fil = string(argv[2]);

                cout << "Loading observations from: " << feats_fil << "...";
                cout.flush();
                allObs.loadFromTextFile(feats_fil);
                cout << "Done.\n";

                allObs.decimateCameraFrames(20);
                allObs.compressIDs();

                ASSERT_(mrpt::system::fileExists(cam_fil));
                cout << "Loading camera params from: " << cam_fil;
                mrpt::config::CConfigFile cfgCam(cam_fil);
                camera_params.loadFromConfigFile("CAMERA", cfgCam);
                cout << "Done.\n";

                cout << "Initial gross estimate...";
                mrpt::vision::ba_initial_estimate(
                    allObs, camera_params, frame_poses, landmark_points);
                cout << "OK\n";
            }
            else
            {
                // Load data from 3 files in the same format as used by
                // "eucsbademo" in the SBA library:
                const string cam_frames_fil = string(argv[1]);
                const string obs_fil = string(argv[2]);
                const string calib_fil = string(argv[3]);

                {
                    cout << "Loading initial camera frames from: "
                         << cam_frames_fil << "...";
                    cout.flush();

                    mrpt::io::CTextFileLinesParser fil(cam_frames_fil);
                    frame_poses.clear();

                    std::istringstream ss;
                    while (fil.getNextLine(ss))
                    {
                        double q[4], t[3];
                        ss >> q[0] >> q[1] >> q[2] >> q[3] >> t[0] >> t[1] >>
                            t[2];
                        mrpt::poses::CPose3DQuat p(
                            t[0], t[1], t[2],
                            mrpt::math::CQuaternionDouble(
                                q[0], q[1], q[2], q[3]));
                        // cout << "cam: " << p << endl;
                        frame_poses.push_back(CPose3D(p));
                    }

                    cout << "Done. " << frame_poses.size()
                         << " cam frames loaded\n";

                    frame_poses_noisy = frame_poses;  // To draw in 3D the
                    // initial values as well.
                }

                {
                    cout << "Loading observations & feature 3D points from: "
                         << obs_fil << "...";
                    cout.flush();

                    mrpt::io::CTextFileLinesParser fil(obs_fil);
                    landmark_points.clear();
                    allObs.clear();

                    std::istringstream ss;
                    while (fil.getNextLine(ss))
                    {
                        // # X Y Z  nframes  frame0 x0 y0  frame1 x1 y1 ...
                        double t[3];
                        size_t N = 0;
                        ss >> t[0] >> t[1] >> t[2] >> N;

                        const TLandmarkID feat_id = landmark_points.size();
                        const TPoint3D pt(t[0], t[1], t[2]);
                        landmark_points.push_back(pt);

                        // Read obs:
                        for (size_t i = 0; i < N; i++)
                        {
                            TCameraPoseID frame_id;
                            TPixelCoordf px;
                            ss >> frame_id >> px.x >> px.y;
                            allObs.push_back(
                                TFeatureObservation(feat_id, frame_id, px));
                            // cout << "feat: " << feat_id << " cam: " <<
                            // frame_id << " px: " << px.x << "," << px.y <<
                            // endl;
                        }
                    }

                    cout << "Done. " << landmark_points.size() << " points, "
                         << allObs.size() << " observations read.\n";

                    landmark_points_real = landmark_points;  // To draw in 3D
                    // the initial
                    // values as well.
                }

                CMatrixDouble33 cam_pars;
                cam_pars.loadFromTextFile(calib_fil);

                // cout << "Calib:\n" << cam_pars << endl;

                camera_params.fx(cam_pars(0, 0));
                camera_params.fy(cam_pars(1, 1));
                camera_params.cx(cam_pars(0, 2));
                camera_params.cy(cam_pars(1, 2));

                cout << "camera calib:\n" << camera_params.dumpAsText() << endl;

                // Change world scale:
                WORLD_SCALE = 2000;
            }

            // Do it:
            bundle_adj_full_demo(
                camera_params, allObs, frame_poses, landmark_points);
        }

        // Display results in 3D:
        // -------------------------------
        gui::CDisplayWindow3D win("Bundle adjustment demo", 800, 600);

        Scene::Ptr& scene = win.get3DSceneAndLock();

        {  // Ground plane:
            auto obj = CGridPlaneXY::Create(-200, 200, -200, 200, 0, 5);
            obj->setColor(0.7, 0.7, 0.7);
            scene->insert(obj);
        }

        if (!landmark_points_real.empty())
        {  // Feature points: ground truth
            auto obj = CPointCloud::Create();
            obj->setPointSize(2);
            obj->setColor(0, 0, 0);
            obj->loadFromPointsList(landmark_points_real);
            obj->setScale(WORLD_SCALE);
            scene->insert(obj);
        }
        if (!landmark_points_noisy.empty())
        {  // Feature points: noisy
            auto obj = CPointCloud::Create();
            obj->setPointSize(4);
            obj->setColor(0.7, 0.2, 0.2, 0);
            obj->loadFromPointsList(landmark_points_noisy);
            obj->setScale(WORLD_SCALE);
            scene->insert(obj);
        }

        {  // Feature points: estimated
            auto obj = CPointCloud::Create();
            obj->setPointSize(3);
            obj->setColor(0, 0, 1, 1.0);
            obj->loadFromPointsList(landmark_points);
            obj->setScale(WORLD_SCALE);
            scene->insert(obj);
        }

        // Camera Frames: estimated
        scene->insert(framePosesVecVisualize(frame_poses_noisy, 1.0, 1));
        scene->insert(framePosesVecVisualize(frame_poses_real, 2.0, 1));
        scene->insert(framePosesVecVisualize(frame_poses, 2.0, 3));

        win.setCameraZoom(100);

        win.unlockAccess3DScene();
        win.repaint();

        // Also, show history of error:
        gui::CDisplayWindowPlots winPlot(
            "Avr log-error with iterations", 500, 200);
        // winPlot.setPos(0,620);
        winPlot.plot(history_avr_err, "b.3");
        winPlot.axis_fit();

        cout << "Close the 3D window or press a key to exit.\n";
        win.waitForKey();

        return 0;
    }
    catch (const std::exception& e)
    {
        std::cout << "MRPT exception caught: " << e.what() << std::endl;
        return -1;
    }
}

mrpt::opengl::CSetOfObjects::Ptr framePosesVecVisualize(
    const TFramePosesVec& poses, const double len, const double lineWidth)
{
    auto obj = mrpt::opengl::CSetOfObjects::Create();

    for (size_t i = 0; i < poses.size(); i++)
    {
        CSetOfObjects::Ptr corner =
            opengl::stock_objects::CornerXYZSimple(len, lineWidth);
        CPose3D p = poses[i];
        p.x(WORLD_SCALE * p.x());
        p.y(WORLD_SCALE * p.y());
        p.z(WORLD_SCALE * p.z());
        corner->setPose(p);
        corner->setName(format("%u", (unsigned int)i));
        corner->enableShowName();
        obj->insert(corner);
    }
    return obj;
}