Performance




#include <utility>

class X
{
public:
  X(int sz_);
  X(const X& rhs_);
  X(X&& rhs_);
  ~X();
private:
  int _size;
  int *_v;
};

X::X(int sz_) : _size(sz_), _v(new int[_size])
{
  // dont care to initialize _v[]
}
X::X(const X& rhs_) : _size(rhs_._size), _v(new int[_size])
{
  for(int i = 0; i < _size; ++i)
    _v[i] = rhs_._v[i];
}

X::X(X&& rhs_) : _size(rhs_._size)
{
  _v = rhs_._v;
  rhs_._size = 0;
  rhs_._v = 0;
}
X::~X()
{
  delete [] _v;
}


// with copy
int main()
{
  for( int i = 0; i < 1000; ++i)
  {
    X x(1000000);
    X y(x);
    //X y(std::move(x));
  }
  return 0;
}

$ time ./a.out
real    0m18.324s
user    0m8.201s
sys     0m5.728s



// with move
int main()
{
  for( int i = 0; i < 1000; ++i)
  {
    X x(1000000);
    //X y(x);
    X y(std::move(x));
  }
  return 0;
}

$ time ./a.out
real    0m4.667s
user    0m0.700s
sys     0m2.980s



// with common sense
int main()
{
  X x(1000000);
  for( int i = 0; i < 1000; ++i)
  {
    X y(x);
    //X y(std::move(x));
  }
  return 0;
}

$ time ./a.out
real    0m11.662s
user    0m7.528s
sys     0m3.188s



// being too greedy :)
int main()
{
  X x(1000000);
  for( int i = 0; i < 1000; ++i)
  {
    //X y(x);
    X y(std::move(x)); // bad: further y's will be empty...
  }
  return 0;
}


// ============== another version: slow downd the copy =======

X::X(const X& rhs_) : _size(rhs_._size), _v(new int[_size])
{
  for(int i = 0; i < _size; ++i)
    for (int j = 0; j < 1000; ++j ) // make it measurable
      _v[i] = rhs_._v[i];
}


int main()
{
  X x(1000000);
  X y(x);
  //  X y(std::move(x)); 
  return 0;
}

$ g++ -std=c++0x -pedantic -Wall r.cpp

time ./a.out
real    0m6.545s
user    0m6.124s
sys     0m0.028s

$ g++ -O3 -std=c++0x -pedantic -Wall r.cpp

time ./a.out
real    0m2.024s
user    0m1.840s
sys     0m0.024s


int main()
{
  X x(1000000);
  // X y(x);
  X y(std::move(x));
  return 0;
}

$ g++ -std=c++0x -pedantic -Wall r.cpp
time ./a.out
real    0m0.015s
user    0m0.004s
sys     0m0.004s

$ g++ -O3 -std=c++0x -pedantic -Wall r.cpp
time ./a.out
real    0m0.015s
user    0m0.000s
sys     0m0.012s


// ================== another version: returning a local ===================

X f()
{
  X x(10000000);
  //
  return x;
  // return std::move(x);
}

int main()
{
  X y(f());
  return 0;
}

$ time ./a.out
real    0m0.043s
user    0m0.020s
sys     0m0.016s

X f()
{
  X x(10000000);
  //
  // return x;
  return std::move(x);
}

int main()
{
  X y(f());
  return 0;
}

$ time ./a.out
real    0m0.039s
user    0m0.016s
sys     0m0.016s



There is almost no difference since modern compilers apply RVO
(return value optimization). That means, local x inside f() is
created in place of y.

In some cases, the performance even worst when using std::move()!

David Abrahams has an article on this:
http://cpp-next.com/archive/2009/08/want-speed-pass-by-value/