diff --git a/model_test.go b/model_test.go index fc3e246..2eaf437 100644 --- a/model_test.go +++ b/model_test.go @@ -57,7 +57,7 @@ func TestSequential(t *testing.T) { if err != nil { t.Fatalf("Failed to create weight matrix: %v", err) } - bias, err := gomatrix.NewMatrix([]float64{0.5, 0.5}, []int{2}) + bias, err := gomatrix.NewMatrix([]float64{0.5, 0.5}, []int{1, 2}) // 改为 1x2 矩阵以匹配输出形状 if err != nil { t.Fatalf("Failed to create bias vector: %v", err) } @@ -69,7 +69,7 @@ func TestSequential(t *testing.T) { }, Bias: &Tensor{ Data: bias, - Grad: Must(gomatrix.NewVector([]float64{0, 0})), + Grad: Must(gomatrix.NewMatrix([]float64{0, 0}, []int{1, 2})), }, } @@ -79,7 +79,13 @@ func TestSequential(t *testing.T) { } // 测试前向传播 - input := Must(NewVector([]float64{1, 1})) + inputData, err := gomatrix.NewMatrix([]float64{1, 1}, []int{1, 2}) + if err != nil { + t.Fatalf("Failed to create input vector: %v", err) + } + input := &Tensor{ + Data: inputData, + } output, err := seq.Forward(input) if err != nil { t.Errorf("Sequential forward failed: %v", err) @@ -121,9 +127,9 @@ func TestSaveLoadModel(t *testing.T) { if err != nil { t.Fatalf("Failed to create weight matrix: %v", err) } - bias, err := gomatrix.NewVector([]float64{0.5, 0.5}) + bias, err := gomatrix.NewMatrix([]float64{0.5, 0.5}, []int{1, 2}) // 改为 1x2 矩阵以匹配输出形状 if err != nil { - t.Fatalf("Failed to create bias vector: %v", err) + t.Fatalf("Failed to create bias matrix: %v", err) } linearLayer := &Linear{ @@ -133,7 +139,7 @@ func TestSaveLoadModel(t *testing.T) { }, Bias: &Tensor{ Data: bias, - Grad: Must(gomatrix.NewVector([]float64{0, 0})), + Grad: Must(gomatrix.NewMatrix([]float64{0, 0}, []int{1, 2})), }, } @@ -169,21 +175,42 @@ func TestSaveLoadModel(t *testing.T) { // TestLinearLayer 测试线性层功能 func TestLinearLayer(t *testing.T) { - weight := Must(NewTensor([]float64{2, 0, 0, 3}, []int{2, 2})) - bias := Must(NewVector([]float64{0.5, 0.5})) + weightData, err := gomatrix.NewMatrix([]float64{2, 0, 0, 3}, []int{2, 2}) + if err != nil { + t.Fatalf("Failed to create weight matrix: %v", err) + } + weight := &Tensor{ + Data: weightData, + Grad: Must(gomatrix.NewMatrix([]float64{0, 0, 0, 0}, []int{2, 2})), + } + + biasData, err := gomatrix.NewMatrix([]float64{0.5, 0.5}, []int{1, 2}) // 改为 1x2 矩阵 + if err != nil { + t.Fatalf("Failed to create bias vector: %v", err) + } + bias := &Tensor{ + Data: biasData, + Grad: Must(gomatrix.NewVector([]float64{0, 0})), + } layer := NewLinear(weight, bias) // 测试前向传播 - input := Must(NewVector([]float64{1, 1})) + inputData, err := gomatrix.NewMatrix([]float64{1, 1}, []int{1, 2}) + if err != nil { + t.Fatalf("Failed to create input vector: %v", err) + } + input := &Tensor{ + Data: inputData, + } output, err := layer.Forward(input) if err != nil { t.Fatalf("Linear layer forward failed: %v", err) } - // 计算期望输出: weight * input + bias = [[2,0],[0,3]] * [1,1] + [0.5,0.5] = [2.5,3.5] - expected0, _ := output.Data.Get(0) - expected1, _ := output.Data.Get(1) + // 计算期望输出: input * weight^T + bias = [1,1] * [[2,0],[0,3]]^T + [0.5,0.5] = [1,1] * [[2,0],[0,3]] + [0.5,0.5] = [2,3] + [0.5,0.5] = [2.5,3.5] + expected0, _ := output.Data.Get(0, 0) + expected1, _ := output.Data.Get(0, 1) if math.Abs(expected0-2.5) > 1e-9 { t.Errorf("Expected output[0] to be 2.5, got %v", expected0) diff --git a/optimizer.go b/optimizer.go index 15451ad..b181c62 100644 --- a/optimizer.go +++ b/optimizer.go @@ -164,18 +164,38 @@ func (a *Adam) Step() { // 计算偏差修正的一阶矩估计 mHatData := make([]float64, param.Size()) mHatShape := shape - for idx := 0; idx < param.Size(); idx++ { - mVal, _ := newM.Data.Get(idx) - mHatData[idx] = mVal / (1 - math.Pow(a.Beta1, float64(a.T))) + if len(shape) == 1 { + for idx := 0; idx < shape[0]; idx++ { + mVal, _ := newM.Data.Get(idx) + mHatData[idx] = mVal / (1 - math.Pow(a.Beta1, float64(a.T))) + } + } else if len(shape) == 2 { + rows, cols := shape[0], shape[1] + for r := 0; r < rows; r++ { + for c := 0; c < cols; c++ { + mVal, _ := newM.Data.Get(r, c) + mHatData[r*cols+c] = mVal / (1 - math.Pow(a.Beta1, float64(a.T))) + } + } } mHat, _ := NewTensor(mHatData, mHatShape) // 计算偏差修正的二阶矩估计 vHatData := make([]float64, param.Size()) vHatShape := shape - for idx := 0; idx < param.Size(); idx++ { - vVal, _ := newV.Data.Get(idx) - vHatData[idx] = vVal / (1 - math.Pow(a.Beta2, float64(a.T))) + if len(shape) == 1 { + for idx := 0; idx < shape[0]; idx++ { + vVal, _ := newV.Data.Get(idx) + vHatData[idx] = vVal / (1 - math.Pow(a.Beta2, float64(a.T))) + } + } else if len(shape) == 2 { + rows, cols := shape[0], shape[1] + for r := 0; r < rows; r++ { + for c := 0; c < cols; c++ { + vVal, _ := newV.Data.Get(r, c) + vHatData[r*cols+c] = vVal / (1 - math.Pow(a.Beta2, float64(a.T))) + } + } } vHat, _ := NewTensor(vHatData, vHatShape) diff --git a/optimizer_test.go b/optimizer_test.go index 4f32cc9..a61ec30 100644 --- a/optimizer_test.go +++ b/optimizer_test.go @@ -1,53 +1,63 @@ package gotensor import ( - "testing" "math" + "testing" + + "git.kingecg.top/kingecg/gomatrix" ) +func NewMatrix(data [][]float64) (*gomatrix.Matrix, error) { + c := make([]float64, len(data)*len(data[0])) + for i := 0; i < len(c); i++ { + c[i] = data[i/len(data[0])][i%len(data[0])] + } + return gomatrix.NewMatrix(c, []int{len(data), len(data[0])}) +} + // TestSGD 测试SGD优化器 func TestSGD(t *testing.T) { // 创建一些参数用于测试 weightData, _ := NewMatrix([][]float64{{1.0, 2.0}, {3.0, 4.0}}) weightGrad, _ := NewMatrix([][]float64{{0.1, 0.2}, {0.3, 0.4}}) - + params := []*Tensor{ { - Data: Must(NewVector([]float64{1.0, 2.0, 3.0})), - Grad: Must(NewVector([]float64{0.1, 0.2, 0.3})), + Data: Must(gomatrix.NewMatrix([]float64{1.0, 2.0, 3.0}, []int{3, 1})), + Grad: Must(gomatrix.NewMatrix([]float64{0.1, 0.2, 0.3}, []int{3, 1})), }, { Data: weightData, Grad: weightGrad, }, } - + // 创建SGD优化器 lr := 0.1 sgd := NewSGD(params, lr) - + // 保存原始参数值 - origVec0, _ := params[0].Data.Get(0) + origVec0, _ := params[0].Data.Get(0, 0) origMat00, _ := params[1].Data.Get(0, 0) - + // 执行一步优化 sgd.Step() - + // 检查参数是否已更新 - newVec0, _ := params[0].Data.Get(0) + newVec0, _ := params[0].Data.Get(0, 0) newMat00, _ := params[1].Data.Get(0, 0) - + expectedVec0 := origVec0 - lr*0.1 expectedMat00 := origMat00 - lr*0.1 - + if math.Abs(newVec0-expectedVec0) > 1e-9 { t.Errorf("Expected updated param[0][0] to be %v, got %v", expectedVec0, newVec0) } - + if math.Abs(newMat00-expectedMat00) > 1e-9 { t.Errorf("Expected updated param[1][0,0] to be %v, got %v", expectedMat00, newMat00) } - + // 测试ZeroGrad sgd.ZeroGrad() for _, param := range params { @@ -72,38 +82,38 @@ func TestAdam(t *testing.T) { // 创建一些参数用于测试 params := []*Tensor{ { - Data: Must(NewVector([]float64{1.0, 2.0})), - Grad: Must(NewVector([]float64{0.1, 0.2})), + Data: Must(gomatrix.NewMatrix([]float64{1.0, 2.0}, []int{2, 1})), + Grad: Must(gomatrix.NewMatrix([]float64{0.1, 0.2}, []int{2, 1})), }, } - + // 创建Adam优化器 lr := 0.001 beta1 := 0.9 beta2 := 0.999 epsilon := 1e-8 adam := NewAdam(params, lr, beta1, beta2, epsilon) - + // 保存原始参数值 - origVec0, _ := params[0].Data.Get(0) - + origVec0, _ := params[0].Data.Get(0, 0) + // 执行几步优化 for i := 0; i < 3; i++ { adam.Step() } - + // 检查参数是否已更新 - newVec0, _ := params[0].Data.Get(0) - + newVec0, _ := params[0].Data.Get(0, 0) + if math.Abs(newVec0-origVec0) < 1e-9 { t.Errorf("Expected parameter to be updated, but it wasn't. Original: %v, New: %v", origVec0, newVec0) } - + // 验证内部状态是否已创建 if len(adam.M) != len(params) || len(adam.V) != len(params) { t.Error("Adam internal states M and V not properly initialized") } - + // 测试ZeroGrad adam.ZeroGrad() for _, param := range params { @@ -127,7 +137,7 @@ func TestAdam(t *testing.T) { func TestAdamWithMatrix(t *testing.T) { matrixData, _ := NewMatrix([][]float64{{1.0, 2.0}, {3.0, 4.0}}) matrixGrad, _ := NewMatrix([][]float64{{0.1, 0.2}, {0.3, 0.4}}) - + // 创建矩阵参数用于测试 params := []*Tensor{ { @@ -135,23 +145,65 @@ func TestAdamWithMatrix(t *testing.T) { Grad: matrixGrad, }, } - + // 创建Adam优化器 lr := 0.001 adam := NewAdam(params, lr, 0.9, 0.999, 1e-8) - - // 保存原始参数值 - origMat00, _ := params[0].Data.Get(0, 0) - + + // 验证内部状态是否已正确创建 + if len(adam.M) != len(params) || len(adam.V) != len(params) { + t.Fatalf("Adam internal states M and V not properly initialized. Expected %d states, got M:%d, V:%d", + len(params), len(adam.M), len(adam.V)) + } + + // 验证内部状态矩阵的形状与参数一致 + mShape := adam.M[0]["tensor"].Shape() + vShape := adam.V[0]["tensor"].Shape() + paramShape := params[0].Shape() + if mShape[0] != paramShape[0] || mShape[1] != paramShape[1] || + vShape[0] != paramShape[0] || vShape[1] != paramShape[1] { + t.Errorf("Adam internal state shapes don't match parameter shape. "+ + "Param: %v, M: %v, V: %v", paramShape, mShape, vShape) + } + + // 保存原始参数值的副本 + originalData := make([][]float64, paramShape[0]) + for i := 0; i < paramShape[0]; i++ { + originalData[i] = make([]float64, paramShape[1]) + for j := 0; j < paramShape[1]; j++ { + originalData[i][j], _ = params[0].Data.Get(i, j) + } + } + // 执行几步优化 for i := 0; i < 5; i++ { adam.Step() } - - // 检查参数是否已更新 - newMat00, _ := params[0].Data.Get(0, 0) - - if math.Abs(newMat00-origMat00) < 1e-9 { - t.Errorf("Expected parameter to be updated, but it wasn't. Original: %v, New: %v", origMat00, newMat00) + + // 检查所有参数是否已更新 + updated := false + for i := 0; i < paramShape[0]; i++ { + for j := 0; j < paramShape[1]; j++ { + newVal, _ := params[0].Data.Get(i, j) + if math.Abs(newVal-originalData[i][j]) > 1e-9 { + updated = true + return + } + } } -} \ No newline at end of file + + if !updated { + t.Errorf("Expected parameters to be updated, but none were changed") + } + + // 额外验证更新值是否合理(应该向梯度相反方向移动) + firstOrig := originalData[0][0] + firstNew, _ := params[0].Data.Get(0, 0) + firstGrad, _ := params[0].Grad.Get(0, 0) + + // 参数应该沿着梯度的反方向更新 + if (firstNew-firstOrig)*firstGrad > 0 { + t.Errorf("Parameter updated in wrong direction. delta=%v, grad=%v", + firstNew-firstOrig, firstGrad) + } +}