UPDATE 2021
I’ve modified the benchmark code as follows:
- std::chrono used for timing measurements instead of boost
- C++11
<random>
used instead ofrand()
- Avoid repeated operations that can get hoisted out. The base parameter is ever-changing.
I get the following results with GCC 10 -O2 (in seconds):
exp c++ pow c pow x*x*x...
2 0.204243 1.39962 0.0902527
3 1.36162 1.38291 0.107679
4 1.37717 1.38197 0.106103
5 1.3815 1.39139 0.117097
GCC 10 -O3 is almost identical to GCC 10 -O2.
With GCC 10 -O2 -ffast-math:
exp c++ pow c pow x*x*x...
2 0.203625 1.4056 0.0913414
3 0.11094 1.39938 0.108027
4 0.201593 1.38618 0.101585
5 0.102141 1.38212 0.10662
With GCC 10 -O3 -ffast-math:
exp c++ pow c pow x*x*x...
2 0.0451995 1.175 0.0450497
3 0.0470842 1.20226 0.051399
4 0.0475239 1.18033 0.0473844
5 0.0522424 1.16817 0.0522291
With Clang 12 -O2:
exp c++ pow c pow x*x*x...
2 0.106242 0.105435 0.105533
3 1.45909 1.4425 0.102235
4 1.45629 1.44262 0.108861
5 1.45837 1.44483 0.1116
Clang 12 -O3 is almost identical to Clang 12 -O2.
With Clang 12 -O2 -ffast-math:
exp c++ pow c pow x*x*x...
2 0.0233731 0.0232457 0.0231076
3 0.0271074 0.0266663 0.0278415
4 0.026897 0.0270698 0.0268115
5 0.0312481 0.0296402 0.029811
Clang 12 -O3 -ffast-math is almost identical to Clang 12 -O2 -ffast-math.
Machine is Intel Core i7-7700K on Linux 5.4.0-73-generic x86_64.
Conclusions:
- With GCC 10 (no -ffast-math),
x*x*x...
is always faster - With GCC 10 -O2 -ffast-math,
std::pow
is as fast asx*x*x...
for odd exponents - With GCC 10 -O3 -ffast-math,
std::pow
is as fast asx*x*x...
for all test cases, and is around twice as fast as -O2. - With GCC 10, C’s
pow(double, double)
is always much slower - With Clang 12 (no -ffast-math),
x*x*x...
is faster for exponents greater than 2 - With Clang 12 -ffast-math, all methods produce similar results
- With Clang 12,
pow(double, double)
is as fast asstd::pow
for integral exponents - Writing benchmarks without having the compiler outsmart you is hard.
I’ll eventually get around to installing a more recent version of GCC on my machine and will update my results when I do so.
Here’s the updated benchmark code:
#include <cmath>
#include <chrono>
#include <iostream>
#include <random>
using Moment = std::chrono::high_resolution_clock::time_point;
using FloatSecs = std::chrono::duration<double>;
inline Moment now()
{
return std::chrono::high_resolution_clock::now();
}
#define TEST(num, expression) \
double test##num(double b, long loops) \
{ \
double x = 0.0; \
\
auto startTime = now(); \
for (long i=0; i<loops; ++i) \
{ \
x += expression; \
b += 1.0; \
} \
auto elapsed = now() - startTime; \
auto seconds = std::chrono::duration_cast<FloatSecs>(elapsed); \
std::cout << seconds.count() << "\t"; \
return x; \
}
TEST(2, b*b)
TEST(3, b*b*b)
TEST(4, b*b*b*b)
TEST(5, b*b*b*b*b)
template <int exponent>
double testCppPow(double base, long loops)
{
double x = 0.0;
auto startTime = now();
for (long i=0; i<loops; ++i)
{
x += std::pow(base, exponent);
base += 1.0;
}
auto elapsed = now() - startTime;
auto seconds = std::chrono::duration_cast<FloatSecs>(elapsed); \
std::cout << seconds.count() << "\t"; \
return x;
}
double testCPow(double base, double exponent, long loops)
{
double x = 0.0;
auto startTime = now();
for (long i=0; i<loops; ++i)
{
x += ::pow(base, exponent);
base += 1.0;
}
auto elapsed = now() - startTime;
auto seconds = std::chrono::duration_cast<FloatSecs>(elapsed); \
std::cout << seconds.count() << "\t"; \
return x;
}
int main()
{
using std::cout;
long loops = 100000000l;
double x = 0;
std::random_device rd;
std::default_random_engine re(rd());
std::uniform_real_distribution<double> dist(1.1, 1.2);
cout << "exp\tc++ pow\tc pow\tx*x*x...";
cout << "\n2\t";
double b = dist(re);
x += testCppPow<2>(b, loops);
x += testCPow(b, 2.0, loops);
x += test2(b, loops);
cout << "\n3\t";
b = dist(re);
x += testCppPow<3>(b, loops);
x += testCPow(b, 3.0, loops);
x += test3(b, loops);
cout << "\n4\t";
b = dist(re);
x += testCppPow<4>(b, loops);
x += testCPow(b, 4.0, loops);
x += test4(b, loops);
cout << "\n5\t";
b = dist(re);
x += testCppPow<5>(b, loops);
x += testCPow(b, 5.0, loops);
x += test5(b, loops);
std::cout << "\n" << x << "\n";
}
Old Answer, 2010
I tested the performance difference between x*x*...
vs pow(x,i)
for small i
using this code:
#include <cstdlib>
#include <cmath>
#include <boost/date_time/posix_time/posix_time.hpp>
inline boost::posix_time::ptime now()
{
return boost::posix_time::microsec_clock::local_time();
}
#define TEST(num, expression) \
double test##num(double b, long loops) \
{ \
double x = 0.0; \
\
boost::posix_time::ptime startTime = now(); \
for (long i=0; i<loops; ++i) \
{ \
x += expression; \
x += expression; \
x += expression; \
x += expression; \
x += expression; \
x += expression; \
x += expression; \
x += expression; \
x += expression; \
x += expression; \
} \
boost::posix_time::time_duration elapsed = now() - startTime; \
\
std::cout << elapsed << " "; \
\
return x; \
}
TEST(1, b)
TEST(2, b*b)
TEST(3, b*b*b)
TEST(4, b*b*b*b)
TEST(5, b*b*b*b*b)
template <int exponent>
double testpow(double base, long loops)
{
double x = 0.0;
boost::posix_time::ptime startTime = now();
for (long i=0; i<loops; ++i)
{
x += std::pow(base, exponent);
x += std::pow(base, exponent);
x += std::pow(base, exponent);
x += std::pow(base, exponent);
x += std::pow(base, exponent);
x += std::pow(base, exponent);
x += std::pow(base, exponent);
x += std::pow(base, exponent);
x += std::pow(base, exponent);
x += std::pow(base, exponent);
}
boost::posix_time::time_duration elapsed = now() - startTime;
std::cout << elapsed << " ";
return x;
}
int main()
{
using std::cout;
long loops = 100000000l;
double x = 0.0;
cout << "1 ";
x += testpow<1>(rand(), loops);
x += test1(rand(), loops);
cout << "\n2 ";
x += testpow<2>(rand(), loops);
x += test2(rand(), loops);
cout << "\n3 ";
x += testpow<3>(rand(), loops);
x += test3(rand(), loops);
cout << "\n4 ";
x += testpow<4>(rand(), loops);
x += test4(rand(), loops);
cout << "\n5 ";
x += testpow<5>(rand(), loops);
x += test5(rand(), loops);
cout << "\n" << x << "\n";
}
Results are:
1 00:00:01.126008 00:00:01.128338
2 00:00:01.125832 00:00:01.127227
3 00:00:01.125563 00:00:01.126590
4 00:00:01.126289 00:00:01.126086
5 00:00:01.126570 00:00:01.125930
2.45829e+54
Note that I accumulate the result of every pow calculation to make sure the compiler doesn’t optimize it away.
If I use the std::pow(double, double)
version, and loops = 1000000l
, I get:
1 00:00:00.011339 00:00:00.011262
2 00:00:00.011259 00:00:00.011254
3 00:00:00.975658 00:00:00.011254
4 00:00:00.976427 00:00:00.011254
5 00:00:00.973029 00:00:00.011254
2.45829e+52
This is on an Intel Core Duo running Ubuntu 9.10 64bit. Compiled using gcc 4.4.1 with -o2 optimization.
So in C, yes x*x*x
will be faster than pow(x, 3)
, because there is no pow(double, int)
overload. In C++, it will be the roughly same. (Assuming the methodology in my testing is correct.)
This is in response to the comment made by An Markm:
Even if a using namespace std
directive was issued, if the second parameter to pow
is an int
, then the std::pow(double, int)
overload from <cmath>
will be called instead of ::pow(double, double)
from <math.h>
.
This test code confirms that behavior:
#include <iostream>
namespace foo
{
double bar(double x, int i)
{
std::cout << "foo::bar\n";
return x*i;
}
}
double bar(double x, double y)
{
std::cout << "::bar\n";
return x*y;
}
using namespace foo;
int main()
{
double a = bar(1.2, 3); // Prints "foo::bar"
std::cout << a << "\n";
return 0;
}