% 2013-08-19 Magnus Burenius

% Loads the 2D annotation and does 3D reconstruction of joints and cameras.
% The reconstructed camera matrices for all frames are saved to cameras.txt.
% The reconstructed 3D joint positions are saved to positions3d.txt.

% The cameras are calibrated such that all reconstructed 3D joint positions
% are within the unit cube. We assume that the dynamic cameras follow a
% person which moves around in a volume that is small relative its distance
% to the cameras. We can then use the ortographic camera model and assume
% that the 3D rotation of each camera is approximately static. The motion
% of the cameras are modelled as translations in 2D. For more details
% please see the paper:
% "Motion Capture from Dynamic Orthographic Cameras"
% by Burenius, Sullivan, Carlsson
% in ICCV Workshop on Dynamic Shape Capture and Analysis 2011.

% If a more accurate 3D reconstruction is required one could use this as an
% initial solution in a bundle-adjustment optimization, which could impose:
% * Dynamic but smooth 3D rotations for the cameras.
% * Limb length constraints.
% * Joint angle constraints.

clear all
close all

CAMERAS = 3;
LANDMARKS = 14;

dataDirs = { '../Player 1/Sequence 1/';
             '../Player 1/Sequence 2/';
             '../Player 2/Sequence 1/';
             '../Player 2/Sequence 2/' };

for d = 1:length(dataDirs)

    fprintf( '\nReconstructing %s\n', dataDirs{d} )
    
    % Load 2D annotation:
    x = load( [dataDirs{d} 'positions2d.txt'] );
    FRAMES = length(x) / 2 / LANDMARKS / CAMERAS;
    positions2d = reshape( x, 2, LANDMARKS, CAMERAS, FRAMES );

    % Make the 2D positions have zero-mean for every frame and camera:
    meanPos = mean( positions2d, 2 );
    positions2dZeroMean = positions2d;
    for n=1:LANDMARKS
        positions2dZeroMean(:,n,:,:) = positions2dZeroMean(:,n,:,:) - meanPos;
    end

    % Create measurement matrix W of size (2*C)x(LANDMARKS*FRAMES):
    W1 = reshape( positions2dZeroMean(:,:,1,:), 2, LANDMARKS*FRAMES );
    W2 = reshape( positions2dZeroMean(:,:,2,:), 2, LANDMARKS*FRAMES );
    W3 = reshape( positions2dZeroMean(:,:,3,:), 2, LANDMARKS*FRAMES );
    W = [W1;W2;W3];

    % Reconstruct camera matrices represented by the 2Cx3 matrix M and
    % 3D points represented by the 3x(LANDMARKS*FRAMES) matrix X:
    [M X] = AffineFactorization( W );
    [M X] = Rectify( M, X );
    [M X] = Reflect( M, X );
    [M X] = RotateReconstruction( M, X );
    [M X] = ScaleToUnitCube( M, X );

    residual = W-M*X;
    error = sqrt( sum(residual(:).^2) / LANDMARKS / FRAMES / CAMERAS );
    fprintf( 'Re-projection error: %.0f pixels \n', error )
    fprintf( 'per landmark per frame per camera.\n' )

    % Re-structure format:
    positions3d = reshape( X, 3, LANDMARKS, FRAMES );
    cameraMatrices = zeros( 2, 4, CAMERAS, FRAMES );
    for c=1:CAMERAS
        cameraMatrices(:,1:3,c,:) = repmat( M(2*c-1 : 2*c,:), [1 1 FRAMES] );
        cameraMatrices(:,  4,c,:) = meanPos(:,:,c,:);
    end

    % Save 3D reconstruction to 'positions3d.txt' and 'cameras.txt':
    p = positions3d(:);
    c = cameraMatrices(:);
    save( [dataDirs{d} 'positions3d.txt'], 'p', '-ascii' );
    save( [dataDirs{d}     'cameras.txt'], 'c', '-ascii' );

end