### 编写高效的机器学习代码

`grad`

`weight = -eta * (grad + lambda * weight)`

。因此，我们可能会实现一个名为`UpdateWeight`

`weight`

```void UpdateWeight(const float *grad, float eta, float lambda,
int n, float *weight) {
for (int i = 0; i < n; ++i) {
weight[i] =  -eta * (grad[i] + lambda * weight[i]);
}
}```

`weight`

```void UpdateWeight(const Vec& grad, float eta, float lambda, Vec& weight) {
weight = -eta * (grad + lambda * weight);
}```

### 运算符重载

```// Naive solution for vector operation overloading
struct Vec {
int len;
float* dptr;
Vec(int len) : len(len) {
dptr = new float[len];
}
Vec(const Vec& src) : len(src.len) {
dptr = new float[len];
memcpy(dptr, src.dptr, sizeof(float)*len );
}
~Vec(void) {
delete [] dptr;
}
};
inline Vec operator+(const Vec &lhs, const Vec &rhs) {
Vec res(lhs.len);
for (int i = 0; i < lhs.len; ++i) {
res.dptr[i] = lhs.dptr[i] + rhs.dptr[i];
}
return res;
}```

### 延迟计算

，所以需要申请一块临时内存空间把结果保存下来。否则，如果我们能提前直到运算结果要存放在哪个变量中，那幺就可以直接将结果存储到相应的内存空间。下面的代码说明了这一情况：

```// Example Lazy evaluation code
// for simplicity, we use struct and make all members public
#include <cstdio>
struct Vec;
// expression structure holds the expression
const Vec &lhs;
const Vec &rhs;
BinaryAddExp(const Vec &lhs, const Vec &rhs)
: lhs(lhs), rhs(rhs) {}
};
// no constructor and destructor to allocate and de-allocate memory,
//  allocation done by user
struct Vec {
int len;
float* dptr;
Vec(void) {}
Vec(float *dptr, int len)
: len(len), dptr(dptr) {}
// here is where evaluation happens
inline Vec &operator=(const BinaryAddExp &src) {
for (int i = 0; i < len; ++i) {
dptr[i] = src.lhs.dptr[i] + src.rhs.dptr[i];
}
return *this;
}
};
// no evaluation happens here
inline BinaryAddExp operator+(const Vec &lhs, const Vec &rhs) {
}
const int n = 3;
int main(void) {
float sa[n] = {1, 2, 3};
float sb[n] = {2, 3, 4};
float sc[n] = {3, 4, 5};
Vec A(sa, n), B(sb, n), C(sc, n);
// run expression
A = B + C;
for (int i = 0; i < n; ++i) {
printf("%d:%f==%f+%f\
", i, A.dptr[i], B.dptr[i], C.dptr[i]);
}
return 0;
}```

，它里面保存了进行向量加法的两个操作数。当重载`operator=`

### 更复杂的表达式以及表达式模板

```// Example code, expression template, and more length equations
// for simplicity, we use struct and make all members public
#include <cstdio>
// this is expression, all expressions must inheritate it,
//  and put their type in subtype
template<typename SubType>
struct Exp {
// returns const reference of the actual type of this expression
inline const SubType& self(void) const {
return *static_cast<const SubType*>(this);
}
};
// note how it is inheritates from Exp
// and put its own type into the template argument
template<typename TLhs, typename TRhs>
const TLhs &lhs;
const TRhs &rhs;
BinaryAddExp(const TLhs& lhs, const TRhs& rhs)
: lhs(lhs), rhs(rhs) {}
// evaluation function, evaluate this expression at position i
inline float Eval(int i) const {
return lhs.Eval(i) + rhs.Eval(i);
}
};
// no constructor and destructor to allocate
// and de-allocate memory, allocation done by user
struct Vec: public Exp<Vec> {
int len;
float* dptr;
Vec(void) {}
Vec(float *dptr, int len)
:len(len), dptr(dptr) {}
// here is where evaluation happens
template<typename EType>
inline Vec& operator= (const Exp<EType>& src_) {
const EType &src = src_.self();
for (int i = 0; i < len; ++i) {
dptr[i] = src.Eval(i);
}
return *this;
}
// evaluation function, evaluate this expression at position i
inline float Eval(int i) const {
return dptr[i];
}
};
// template add, works for any expressions
template<typename TLhs, typename TRhs>
operator+(const Exp<TLhs> &lhs, const Exp<TRhs> &rhs) {
}
const int n = 3;
int main(void) {
float sa[n] = {1, 2, 3};
float sb[n] = {2, 3, 4};
float sc[n] = {3, 4, 5};
Vec A(sa, n), B(sb, n), C(sc, n);
// run expression, this expression is longer:)
A = B + C + C;
for (int i = 0; i < n; ++i) {
printf("%d:%f == %f + %f + %f\
", i,
A.dptr[i], B.dptr[i],
C.dptr[i], C.dptr[i]);
}
return 0;
}```

，这种模式被称为奇异递归模板模式，简称CRTP。`BinaryAddExp`

### 灵活性

```// Example code, expression template
// with binary operator definition and extension
// for simplicity, we use struct and make all members public
#include <cstdio>
// this is expression, all expressions must inheritate it,
// and put their type in subtype
template<typename SubType>
struct Exp{
// returns const reference of the actual type of this expression
inline const SubType& self(void) const {
return *static_cast<const SubType*>(this);
}
};
// binary operators
struct mul{
inline static float Map(float a, float b) {
return a * b;
}
};
// note how it is inheritates from Exp
// and put its own type into the template argument
template<typename OP, typename TLhs, typename TRhs>
struct BinaryMapExp: public Exp<BinaryMapExp<OP, TLhs, TRhs> >{
const TLhs& lhs;
const TRhs& rhs;
BinaryMapExp(const TLhs& lhs, const TRhs& rhs)
:lhs(lhs), rhs(rhs) {}
// evaluation function, evaluate this expression at position i
inline float Eval(int i) const {
return OP::Map(lhs.Eval(i), rhs.Eval(i));
}
};
// no constructor and destructor to allocate and de-allocate memory
// allocation done by user
struct Vec: public Exp<Vec>{
int len;
float* dptr;
Vec(void) {}
Vec(float *dptr, int len)
: len(len), dptr(dptr) {}
// here is where evaluation happens
template<typename EType>
inline Vec& operator=(const Exp<EType>& src_) {
const EType &src = src_.self();
for (int i = 0; i < len; ++i) {
dptr[i] = src.Eval(i);
}
return *this;
}
// evaluation function, evaluate this expression at position i
inline float Eval(int i) const {
return dptr[i];
}
};
// template binary operation, works for any expressions
template<typename OP, typename TLhs, typename TRhs>
inline BinaryMapExp<OP, TLhs, TRhs>
F(const Exp<TLhs>& lhs, const Exp<TRhs>& rhs) {
return BinaryMapExp<OP, TLhs, TRhs>(lhs.self(), rhs.self());
}
template<typename TLhs, typename TRhs>
inline BinaryMapExp<mul, TLhs, TRhs>
operator*(const Exp<TLhs>& lhs, const Exp<TRhs>& rhs) {
return F<mul>(lhs, rhs);
}
// user defined operation
struct maximum{
inline static float Map(float a, float b) {
return a > b ? a : b;
}
};
const int n = 3;
int main(void) {
float sa[n] = {1, 2, 3};
float sb[n] = {2, 3, 4};
float sc[n] = {3, 4, 5};
Vec A(sa, n), B(sb, n), C(sc, n);
// run expression, this expression is longer:)
A = B * F<maximum>(C, B);
for (int i = 0; i < n; ++i) {
printf("%d:%f == %f * max(%f, %f)\
",
i, A.dptr[i], B.dptr[i], C.dptr[i], B.dptr[i]);
}
return 0;
}```

，写起来就比较麻烦。其他的地方基本上与前一小节的代码差不多，稍微一看就能明白。

### 后记

C++11中引入了移动构造函数，可用于保存重复分配的内存，从而消除了一些需要用到表达式模板的情况。然而，内存空间仍然至少需要被分配一次。

This only removes the need of expression template then expression generate space, say`dst = A + B + C`
,`dst`
does not contain space allocated before assignment. （这句话没有理解它的意思，先把原文放这里吧）