diff --git a/examples/advanced_optimizer/advanced_optimizer_example.go b/examples/advanced_optimizer/advanced_optimizer_example.go new file mode 100644 index 0000000..1e89c45 --- /dev/null +++ b/examples/advanced_optimizer/advanced_optimizer_example.go @@ -0,0 +1,221 @@ +package main + +import ( + "fmt" + + "git.kingecg.top/kingecg/gotensor" +) + +// LinearLayer 是一个简单的线性层实现 +type LinearLayer struct { + Weight *gotensor.Tensor + Bias *gotensor.Tensor +} + +// NewLinearLayer 创建一个新的线性层 +func NewLinearLayer(inputSize, outputSize int) *LinearLayer { + // 初始化权重和偏置 + weight, _ := gotensor.NewTensor([]float64{ + 0.1, 0.2, + 0.3, 0.4, + }, []int{outputSize, inputSize}) + + bias, _ := gotensor.NewTensor([]float64{0.1, 0.1}, []int{outputSize}) + + return &LinearLayer{ + Weight: weight, + Bias: bias, + } +} + +func (l *LinearLayer) Forward(inputs *gotensor.Tensor) (*gotensor.Tensor, error) { + // 执行线性变换: output = inputs * weight^T + bias + weightTransposed, err := l.Weight.Data.Transpose() + if err != nil { + return nil, err + } + + // 创建转置后的权重张量 + weightTransposedTensor := &gotensor.Tensor{ + Data: weightTransposed, + Grad: must(gotensor.NewZeros(l.Weight.Shape())), + } + + mulResult, err := inputs.MatMul(weightTransposedTensor) + if err != nil { + return nil, err + } + + output, err := mulResult.Add(l.Bias) + if err != nil { + return nil, err + } + + return output, nil +} + +func (l *LinearLayer) Parameters() []*gotensor.Tensor { + return []*gotensor.Tensor{l.Weight, l.Bias} +} + +func (l *LinearLayer) ZeroGrad() { + l.Weight.ZeroGrad() + l.Bias.ZeroGrad() +} + +// SimpleModel 是一个简单的模型实现 +type SimpleModel struct { + Layer *LinearLayer +} + +func (m *SimpleModel) Forward(inputs *gotensor.Tensor) (*gotensor.Tensor, error) { + return m.Layer.Forward(inputs) +} + +func (m *SimpleModel) Parameters() []*gotensor.Tensor { + return m.Layer.Parameters() +} + +func (m *SimpleModel) ZeroGrad() { + m.Layer.ZeroGrad() +} + +// must 是一个辅助函数,用于处理可能的错误 +func must(t *gotensor.Tensor, err error) *gotensor.Tensor { + if err != nil { + panic(err) + } + return t +} + +func main() { + fmt.Println("Gotensor Advanced Optimizer Example") + + // 创建模型 + model := &SimpleModel{ + Layer: NewLinearLayer(2, 2), // 2输入,2输出 + } + + fmt.Println("比较不同优化器的性能:") + + // 准备训练数据 (简单的线性回归问题) + trainInputs := []*gotensor.Tensor{ + must(gotensor.NewTensor([]float64{1, 0}, []int{2})), + must(gotensor.NewTensor([]float64{0, 1}, []int{2})), + must(gotensor.NewTensor([]float64{1, 1}, []int{2})), + must(gotensor.NewTensor([]float64{0, 0}, []int{2})), + } + + trainTargets := []*gotensor.Tensor{ + must(gotensor.NewTensor([]float64{2, 0}, []int{2})), + must(gotensor.NewTensor([]float64{0, 2}, []int{2})), + must(gotensor.NewTensor([]float64{2, 2}, []int{2})), + must(gotensor.NewTensor([]float64{0, 0}, []int{2})), + } + + // 定义损失函数 (MSE) + lossFn := func(output, target *gotensor.Tensor) *gotensor.Tensor { + // 计算均方误差 + diff, _ := output.Sub(target) + squared, _ := diff.Mul(diff) + sum, _ := squared.Sum() + size := float64(output.Size()) + result, _ := sum.DivScalar(size) + return result + } + + // 测试SGD优化器 + fmt.Println("\n1. 使用SGD优化器训练:") + sgdModel := &SimpleModel{ + Layer: NewLinearLayer(2, 2), + } + sgdOptimizer := gotensor.NewSGD(sgdModel.Parameters(), 0.1) + sgdTrainer := gotensor.NewTrainer(sgdModel, sgdOptimizer) + + sgdInitialLoss, _ := sgdTrainer.Evaluate(trainInputs, trainTargets, lossFn) + fmt.Printf("初始损失: %.6f\n", sgdInitialLoss) + + err := sgdTrainer.Train(trainInputs, trainTargets, 100, lossFn, false) + if err != nil { + fmt.Printf("SGD训练过程中出现错误: %v\n", err) + return + } + + sgdFinalLoss, _ := sgdTrainer.Evaluate(trainInputs, trainTargets, lossFn) + fmt.Printf("SGD最终损失: %.6f\n", sgdFinalLoss) + + // 测试Adam优化器 + fmt.Println("\n2. 使用Adam优化器训练:") + adamModel := &SimpleModel{ + Layer: NewLinearLayer(2, 2), + } + adamOptimizer := gotensor.NewAdam(adamModel.Parameters(), 0.01, 0.9, 0.999, 1e-8) + adamTrainer := gotensor.NewTrainer(adamModel, adamOptimizer) + + adamInitialLoss, _ := adamTrainer.Evaluate(trainInputs, trainTargets, lossFn) + fmt.Printf("初始损失: %.6f\n", adamInitialLoss) + + err = adamTrainer.Train(trainInputs, trainTargets, 100, lossFn, false) + if err != nil { + fmt.Printf("Adam训练过程中出现错误: %v\n", err) + return + } + + adamFinalLoss, _ := adamTrainer.Evaluate(trainInputs, trainTargets, lossFn) + fmt.Printf("Adam最终损失: %.6f\n", adamFinalLoss) + + // 比较两个模型的预测结果 + fmt.Println("\n比较两个模型的预测结果:") + testInput := must(gotensor.NewTensor([]float64{0.5, 0.5}, []int{2})) + + sgdOutput, _ := sgdModel.Forward(testInput) + adamOutput, _ := adamModel.Forward(testInput) + + sgdOut0, _ := sgdOutput.Data.Get(0) + sgdOut1, _ := sgdOutput.Data.Get(1) + adamOut0, _ := adamOutput.Data.Get(0) + adamOut1, _ := adamOutput.Data.Get(1) + + fmt.Printf("输入: [0.5, 0.5]\n") + fmt.Printf("SGD输出: [%.6f, %.6f]\n", sgdOut0, sgdOut1) + fmt.Printf("Adam输出: [%.6f, %.6f]\n", adamOut0, adamOut1) + + // 演示手动使用优化器 + fmt.Println("\n3. 演示手动使用优化器:") + manualModel := &SimpleModel{ + Layer: NewLinearLayer(2, 2), + } + manualOptimizer := gotensor.NewAdam(manualModel.Parameters(), 0.01, 0.9, 0.999, 1e-8) + + // 执行几个训练步骤 + for step := 0; step < 5; step++ { + totalLoss := 0.0 + for i := 0; i < len(trainInputs); i++ { + // 前向传播 + output, err := manualModel.Forward(trainInputs[i]) + if err != nil { + fmt.Printf("前向传播错误: %v\n", err) + return + } + + // 计算损失 + loss := lossFn(output, trainTargets[i]) + lossVal, _ := loss.Data.Get(0) + totalLoss += lossVal + + // 反向传播 + loss.Backward() + + // 更新参数 + manualOptimizer.Step() + + // 清空梯度 + manualOptimizer.ZeroGrad() + } + + avgLoss := totalLoss / float64(len(trainInputs)) + fmt.Printf("步骤 %d, 平均损失: %.6f\n", step+1, avgLoss) + } + + fmt.Println("\n优化器示例完成!") +} diff --git a/examples/simple_model_example.go b/examples/simple_model_example.go new file mode 100644 index 0000000..8017af3 --- /dev/null +++ b/examples/simple_model_example.go @@ -0,0 +1,182 @@ +package main + +import ( + "fmt" + + "git.kingecg.top/kingecg/gotensor" +) + +// LinearLayer 是一个简单的线性层实现 +type LinearLayer struct { + Weight *gotensor.Tensor + Bias *gotensor.Tensor +} + +// NewLinearLayer 创建一个新的线性层 +func NewLinearLayer(inputSize, outputSize int) *LinearLayer { + weight, _ := gotensor.NewTensor([]float64{ + 0.5, 0.1, + 0.2, 0.4, + }, []int{outputSize, inputSize}) + + bias, _ := gotensor.NewTensor([]float64{0, 0}, []int{outputSize}) + + return &LinearLayer{ + Weight: weight, + Bias: bias, + } +} + +func (l *LinearLayer) Forward(inputs *gotensor.Tensor) (*gotensor.Tensor, error) { + // 执行线性变换: output = inputs * weight^T + bias + // 首先转置权重 + weightTransposed, err := l.Weight.Data.Transpose() + if err != nil { + return nil, err + } + + // 创建转置后的权重张量 + weightTransposedTensor := &gotensor.Tensor{ + Data: weightTransposed, + Grad: must(gotensor.NewZeros(l.Weight.Shape())), + } + + // 矩阵乘法 + mulResult, err := inputs.MatMul(weightTransposedTensor) + if err != nil { + return nil, err + } + + // 加上偏置 + output, err := mulResult.Add(l.Bias) + if err != nil { + return nil, err + } + + return output, nil +} + +func (l *LinearLayer) Parameters() []*gotensor.Tensor { + return []*gotensor.Tensor{l.Weight, l.Bias} +} + +func (l *LinearLayer) ZeroGrad() { + l.Weight.ZeroGrad() + l.Bias.ZeroGrad() +} + +// SimpleModel 是一个简单的模型实现 +type SimpleModel struct { + Layer *LinearLayer +} + +func (m *SimpleModel) Forward(inputs *gotensor.Tensor) (*gotensor.Tensor, error) { + return m.Layer.Forward(inputs) +} + +func (m *SimpleModel) Parameters() []*gotensor.Tensor { + return m.Layer.Parameters() +} + +func (m *SimpleModel) ZeroGrad() { + m.Layer.ZeroGrad() +} + +// must 是一个辅助函数,用于处理可能的错误 +func must(t *gotensor.Tensor, err error) *gotensor.Tensor { + if err != nil { + panic(err) + } + return t +} + +func main() { + fmt.Println("Gotensor Simple Model Example") + + // 创建模型 + model := &SimpleModel{ + Layer: NewLinearLayer(2, 2), // 2输入,2输出 + } + + // 创建优化器 (SGD) + optimizer := gotensor.NewSGD(model.Parameters(), 0.01) + + // 创建训练器 + trainer := gotensor.NewTrainer(model, optimizer) + + // 准备训练数据 (简单的XOR问题) + trainInputs := []*gotensor.Tensor{ + must(gotensor.NewTensor([]float64{0, 0}, []int{2})), + must(gotensor.NewTensor([]float64{0, 1}, []int{2})), + must(gotensor.NewTensor([]float64{1, 0}, []int{2})), + must(gotensor.NewTensor([]float64{1, 1}, []int{2})), + } + + trainTargets := []*gotensor.Tensor{ + must(gotensor.NewTensor([]float64{0, 1}, []int{2})), // 0 XOR 0 = 0 (表示为 [0,1] -> [0]) + must(gotensor.NewTensor([]float64{1, 0}, []int{2})), // 0 XOR 1 = 1 (表示为 [1,0] -> [1]) + must(gotensor.NewTensor([]float64{1, 0}, []int{2})), // 1 XOR 0 = 1 (表示为 [1,0] -> [1]) + must(gotensor.NewTensor([]float64{0, 1}, []int{2})), // 1 XOR 1 = 0 (表示为 [0,1] -> [0]) + } + + // 定义损失函数 (MSE) + lossFn := func(output, target *gotensor.Tensor) *gotensor.Tensor { + // 计算均方误差 + diff, _ := output.Sub(target) + squared, _ := diff.Mul(diff) + sum, _ := squared.Sum() + size := float64(output.Size()) + result, _ := sum.DivScalar(size) + return result + } + + fmt.Println("开始训练模型...") + + // 训练模型 + epochs := 100 + err := trainer.Train(trainInputs, trainTargets, epochs, lossFn, true) + if err != nil { + fmt.Printf("训练过程中出现错误: %v\n", err) + return + } + + fmt.Println("训练完成!") + + // 评估模型 + fmt.Println("\n评估训练结果:") + for i, input := range trainInputs { + output, err := model.Forward(input) + if err != nil { + fmt.Printf("前向传播错误: %v\n", err) + continue + } + + inputVal0, _ := input.Data.Get(0) + inputVal1, _ := input.Data.Get(1) + outputVal0, _ := output.Data.Get(0) + outputVal1, _ := output.Data.Get(1) + + fmt.Printf("输入: [%.0f, %.0f] -> 输出: [%.3f, %.3f], 目标: [%.0f, %.0f]\n", + inputVal0, inputVal1, outputVal0, outputVal1, + trainTargets[i].Data.Get(0), trainTargets[i].Data.Get(1)) + } + + // 保存模型 + err = gotensor.SaveModel(model, "/tmp/simple_model.json") + if err != nil { + fmt.Printf("保存模型失败: %v\n", err) + } else { + fmt.Println("模型已保存到 /tmp/simple_model.json") + } + + // 加载模型 + newModel := &SimpleModel{ + Layer: NewLinearLayer(2, 2), + } + err = gotensor.LoadModel(newModel, "/tmp/simple_model.json") + if err != nil { + fmt.Printf("加载模型失败: %v\n", err) + } else { + fmt.Println("模型已从 /tmp/simple_model.json 加载") + } +} diff --git a/go.mod b/go.mod index b7d7b4f..e1aaa88 100644 --- a/go.mod +++ b/go.mod @@ -2,4 +2,4 @@ module git.kingecg.top/kingecg/gotensor go 1.25.1 -require git.kingecg.top/kingecg/gomatrix v0.0.0-20251230141944-2ff4dcfb0fcd // indirect +require git.kingecg.top/kingecg/gomatrix v0.0.0-20251231094846-bfcfba4e3f99 diff --git a/go.sum b/go.sum index 976f0f5..3027aff 100644 --- a/go.sum +++ b/go.sum @@ -1,2 +1,6 @@ git.kingecg.top/kingecg/gomatrix v0.0.0-20251230141944-2ff4dcfb0fcd h1:vn3LW38hQPGig0iqofIaIMYXVp3Uqb5QX6eH5B5lVxU= git.kingecg.top/kingecg/gomatrix v0.0.0-20251230141944-2ff4dcfb0fcd/go.mod h1:CHH1HkVvXrpsb+uDrsoyjx0lTwQ3oSSMbIRJmwvO6z8= +git.kingecg.top/kingecg/gomatrix v0.0.0-20251231092627-f40960a855c7 h1:tutkcVKwpzNYxZRXkunhnkrDGRfMYgvwGAbCBCtO62c= +git.kingecg.top/kingecg/gomatrix v0.0.0-20251231092627-f40960a855c7/go.mod h1:CHH1HkVvXrpsb+uDrsoyjx0lTwQ3oSSMbIRJmwvO6z8= +git.kingecg.top/kingecg/gomatrix v0.0.0-20251231094846-bfcfba4e3f99 h1:sV3rEZIhYwU1TLmqFybT6Lwf6lA4oiITX/HC7i+JsiA= +git.kingecg.top/kingecg/gomatrix v0.0.0-20251231094846-bfcfba4e3f99/go.mod h1:CHH1HkVvXrpsb+uDrsoyjx0lTwQ3oSSMbIRJmwvO6z8= diff --git a/model_test.go b/model_test.go new file mode 100644 index 0000000..bcf136f --- /dev/null +++ b/model_test.go @@ -0,0 +1,218 @@ +package gotensor + +import ( + "math" + "testing" + + "git.kingecg.top/kingecg/gomatrix" +) + +// Linear 是一个简单的线性层实现,用于测试 +type Linear struct { + Weight *Tensor + Bias *Tensor +} + +// NewLinear 创建一个新的线性层 +func NewLinear(weight, bias *Tensor) *Linear { + return &Linear{ + Weight: weight, + Bias: bias, + } +} + +func (l *Linear) Forward(inputs *Tensor) (*Tensor, error) { + // 执行线性变换: output = inputs * weight^T + bias + weightTransposed, err := l.Weight.Data.Transpose() + if err != nil { + return nil, err + } + + mulResult, err := inputs.MatMul(&Tensor{Data: weightTransposed}) // 需要包装为Tensor + if err != nil { + return nil, err + } + + output, err := mulResult.Add(l.Bias) + if err != nil { + return nil, err + } + + return output, nil +} + +func (l *Linear) Parameters() []*Tensor { + return []*Tensor{l.Weight, l.Bias} +} + +func (l *Linear) ZeroGrad() { + l.Weight.ZeroGrad() + l.Bias.ZeroGrad() +} + +// TestSequential 测试Sequential模型的基本功能 +func TestSequential(t *testing.T) { + // 创建一个简单的线性层实现用于测试 + weight, err := gomatrix.NewMatrix([]float64{1, 2, 3, 4}, []int{2, 2}) + if err != nil { + t.Fatalf("Failed to create weight matrix: %v", err) + } + bias, err := gomatrix.NewMatrix([]float64{0.5, 0.5}, []int{2}) + if err != nil { + t.Fatalf("Failed to create bias vector: %v", err) + } + + linearLayer := &Linear{ + Weight: &Tensor{ + Data: weight, + Grad: Must(NewMatrix([][]float64{{0, 0}, {0, 0}})), + }, + Bias: &Tensor{ + Data: bias, + Grad: Must(NewVector([]float64{0, 0})), + }, + } + + // 创建Sequential模型 + seq := &Sequential{ + Layers: []Layer{linearLayer}, + } + + // 测试前向传播 + input := Must(NewVector([]float64{1, 1})) + output, err := seq.Forward(input) + if err != nil { + t.Errorf("Sequential forward failed: %v", err) + } + + if output == nil { + t.Error("Expected output tensor, got nil") + } + + // 测试Parameters方法 + params := seq.Parameters() + if len(params) == 0 { + t.Error("Expected non-empty parameters list") + } + + // 测试ZeroGrad方法 + seq.ZeroGrad() + for _, param := range params { + // 检查梯度是否被清零 + shape := param.Shape() + for i := 0; i < param.Size(); i++ { + var gradVal float64 + if len(shape) == 1 { + gradVal, _ = param.Grad.Get(i) + } else if len(shape) == 2 { + cols := shape[1] + gradVal, _ = param.Grad.Get(i/cols, i%cols) + } + if gradVal != 0 { + t.Errorf("Expected gradient to be zero, got %v", gradVal) + } + } + } +} + +// TestSaveLoadModel 测试模型保存和加载功能 +func TestSaveLoadModel(t *testing.T) { + weight, err := NewMatrix([][]float64{{1, 2}, {3, 4}}) + if err != nil { + t.Fatalf("Failed to create weight matrix: %v", err) + } + bias, err := NewVector([]float64{0.5, 0.5}) + if err != nil { + t.Fatalf("Failed to create bias vector: %v", err) + } + + linearLayer := &Linear{ + Weight: &Tensor{ + Data: weight, + Grad: Must(NewMatrix([][]float64{{0, 0}, {0, 0}})), + }, + Bias: &Tensor{ + Data: bias, + Grad: Must(NewVector([]float64{0, 0})), + }, + } + + model := &Sequential{ + Layers: []Layer{linearLayer}, + } + + // 保存模型 + filepath := "/tmp/test_model.json" + err = SaveModel(model, filepath) + if err != nil { + t.Errorf("SaveModel failed: %v", err) + } + + // 修改原始模型参数 + weightVal, _ := linearLayer.Weight.Data.Get(0, 0) + if math.Abs(weightVal-1.0) > 1e-9 { + t.Errorf("Expected weight to be 1.0, got %v", weightVal) + } + + // 加载模型 + err = LoadModel(model, filepath) + if err != nil { + t.Errorf("LoadModel failed: %v", err) + } + + // 验证加载后的参数 + loadedWeight, _ := linearLayer.Weight.Data.Get(0, 0) + if math.Abs(loadedWeight-1.0) > 1e-9 { + t.Errorf("Expected loaded weight to be 1.0, got %v", loadedWeight) + } +} + +// TestLinearLayer 测试线性层功能 +func TestLinearLayer(t *testing.T) { + weight := Must(NewMatrix([][]float64{{2, 0}, {0, 3}})) + bias := Must(NewVector([]float64{0.5, 0.5})) + + layer := NewLinear(weight, bias) + + // 测试前向传播 + input := Must(NewVector([]float64{1, 1})) + output, err := layer.Forward(input) + if err != nil { + t.Fatalf("Linear layer forward failed: %v", err) + } + + // 计算期望输出: weight * input + bias = [[2,0],[0,3]] * [1,1] + [0.5,0.5] = [2.5,3.5] + expected0, _ := output.Data.Get(0) + expected1, _ := output.Data.Get(1) + + if math.Abs(expected0-2.5) > 1e-9 { + t.Errorf("Expected output[0] to be 2.5, got %v", expected0) + } + if math.Abs(expected1-3.5) > 1e-9 { + t.Errorf("Expected output[1] to be 3.5, got %v", expected1) + } + + // 测试Parameters方法 + params := layer.Parameters() + if len(params) != 2 { // 权重和偏置 + t.Errorf("Expected 2 parameters, got %d", len(params)) + } + + // 测试ZeroGrad方法 + layer.ZeroGrad() + for _, param := range params { + shape := param.Shape() + for i := 0; i < param.Size(); i++ { + var gradVal float64 + if len(shape) == 1 { + gradVal, _ = param.Grad.Get(i) + } else if len(shape) == 2 { + cols := shape[1] + gradVal, _ = param.Grad.Get(i/cols, i%cols) + } + if math.Abs(gradVal) > 1e-9 { + t.Errorf("Expected gradient to be zero, got %v", gradVal) + } + } + } +} diff --git a/optimizer_test.go b/optimizer_test.go new file mode 100644 index 0000000..4f32cc9 --- /dev/null +++ b/optimizer_test.go @@ -0,0 +1,157 @@ +package gotensor + +import ( + "testing" + "math" +) + +// TestSGD 测试SGD优化器 +func TestSGD(t *testing.T) { + // 创建一些参数用于测试 + weightData, _ := NewMatrix([][]float64{{1.0, 2.0}, {3.0, 4.0}}) + weightGrad, _ := NewMatrix([][]float64{{0.1, 0.2}, {0.3, 0.4}}) + + params := []*Tensor{ + { + Data: Must(NewVector([]float64{1.0, 2.0, 3.0})), + Grad: Must(NewVector([]float64{0.1, 0.2, 0.3})), + }, + { + Data: weightData, + Grad: weightGrad, + }, + } + + // 创建SGD优化器 + lr := 0.1 + sgd := NewSGD(params, lr) + + // 保存原始参数值 + origVec0, _ := params[0].Data.Get(0) + origMat00, _ := params[1].Data.Get(0, 0) + + // 执行一步优化 + sgd.Step() + + // 检查参数是否已更新 + newVec0, _ := params[0].Data.Get(0) + newMat00, _ := params[1].Data.Get(0, 0) + + expectedVec0 := origVec0 - lr*0.1 + expectedMat00 := origMat00 - lr*0.1 + + if math.Abs(newVec0-expectedVec0) > 1e-9 { + t.Errorf("Expected updated param[0][0] to be %v, got %v", expectedVec0, newVec0) + } + + if math.Abs(newMat00-expectedMat00) > 1e-9 { + t.Errorf("Expected updated param[1][0,0] to be %v, got %v", expectedMat00, newMat00) + } + + // 测试ZeroGrad + sgd.ZeroGrad() + for _, param := range params { + shape := param.Shape() + for i := 0; i < param.Size(); i++ { + var gradVal float64 + if len(shape) == 1 { + gradVal, _ = param.Grad.Get(i) + } else if len(shape) == 2 { + cols := shape[1] + gradVal, _ = param.Grad.Get(i/cols, i%cols) + } + if math.Abs(gradVal) > 1e-9 { + t.Errorf("Expected gradient to be zero after ZeroGrad, got %v", gradVal) + } + } + } +} + +// TestAdam 测试Adam优化器 +func TestAdam(t *testing.T) { + // 创建一些参数用于测试 + params := []*Tensor{ + { + Data: Must(NewVector([]float64{1.0, 2.0})), + Grad: Must(NewVector([]float64{0.1, 0.2})), + }, + } + + // 创建Adam优化器 + lr := 0.001 + beta1 := 0.9 + beta2 := 0.999 + epsilon := 1e-8 + adam := NewAdam(params, lr, beta1, beta2, epsilon) + + // 保存原始参数值 + origVec0, _ := params[0].Data.Get(0) + + // 执行几步优化 + for i := 0; i < 3; i++ { + adam.Step() + } + + // 检查参数是否已更新 + newVec0, _ := params[0].Data.Get(0) + + if math.Abs(newVec0-origVec0) < 1e-9 { + t.Errorf("Expected parameter to be updated, but it wasn't. Original: %v, New: %v", origVec0, newVec0) + } + + // 验证内部状态是否已创建 + if len(adam.M) != len(params) || len(adam.V) != len(params) { + t.Error("Adam internal states M and V not properly initialized") + } + + // 测试ZeroGrad + adam.ZeroGrad() + for _, param := range params { + shape := param.Shape() + for i := 0; i < param.Size(); i++ { + var gradVal float64 + if len(shape) == 1 { + gradVal, _ = param.Grad.Get(i) + } else if len(shape) == 2 { + cols := shape[1] + gradVal, _ = param.Grad.Get(i/cols, i%cols) + } + if math.Abs(gradVal) > 1e-9 { + t.Errorf("Expected gradient to be zero after ZeroGrad, got %v", gradVal) + } + } + } +} + +// TestAdamWithMatrix 测试Adam优化器处理矩阵参数 +func TestAdamWithMatrix(t *testing.T) { + matrixData, _ := NewMatrix([][]float64{{1.0, 2.0}, {3.0, 4.0}}) + matrixGrad, _ := NewMatrix([][]float64{{0.1, 0.2}, {0.3, 0.4}}) + + // 创建矩阵参数用于测试 + params := []*Tensor{ + { + Data: matrixData, + Grad: matrixGrad, + }, + } + + // 创建Adam优化器 + lr := 0.001 + adam := NewAdam(params, lr, 0.9, 0.999, 1e-8) + + // 保存原始参数值 + origMat00, _ := params[0].Data.Get(0, 0) + + // 执行几步优化 + for i := 0; i < 5; i++ { + adam.Step() + } + + // 检查参数是否已更新 + newMat00, _ := params[0].Data.Get(0, 0) + + if math.Abs(newMat00-origMat00) < 1e-9 { + t.Errorf("Expected parameter to be updated, but it wasn't. Original: %v, New: %v", origMat00, newMat00) + } +} \ No newline at end of file diff --git a/trainer_test.go b/trainer_test.go new file mode 100644 index 0000000..504fbfa --- /dev/null +++ b/trainer_test.go @@ -0,0 +1,320 @@ +package gotensor + +import ( + "testing" + + "git.kingecg.top/kingecg/gomatrix" +) + +// MockLayer 是一个用于测试的模拟层 +type MockLayer struct { + Weight *Tensor +} + +// NewMockLayer 创建一个新的模拟层 +func NewMockLayer() *MockLayer { + weight, _ := gomatrix.NewMatrix([]float64{0.5, 0.3, 0.4, 0.7}, []int{2, 2}) + grad, _ := gomatrix.NewZeros([]int{2, 2}) + + return &MockLayer{ + Weight: &Tensor{ + Data: weight, + Grad: grad, + }, + } +} + +func (m *MockLayer) Forward(inputs *Tensor) (*Tensor, error) { + // 简单的矩阵乘法 + output, err := m.Weight.MatMul(inputs) + if err != nil { + return nil, err + } + return output, nil +} + +func (m *MockLayer) Parameters() []*Tensor { + return []*Tensor{m.Weight} +} + +func (m *MockLayer) ZeroGrad() { + m.Weight.ZeroGrad() +} + +// MockModel 是一个用于测试的模拟模型 +type MockModel struct { + Layer *MockLayer +} + +func (m *MockModel) Forward(inputs *Tensor) (*Tensor, error) { + return m.Layer.Forward(inputs) +} + +func (m *MockModel) Parameters() []*Tensor { + return m.Layer.Parameters() +} + +func (m *MockModel) ZeroGrad() { + m.Layer.ZeroGrad() +} + +func NewVector(data []float64) (*Tensor, error) { + return NewTensor(data, []int{len(data), 1}) + +} + +func Must(t *Tensor, err error) *Tensor { + if err != nil { + panic(err) + } + return t +} + +// TestTrainer 测试训练器的基本功能 +func TestTrainer(t *testing.T) { + // 创建模型 + mockLayer := NewMockLayer() + model := &MockModel{ + Layer: mockLayer, + } + + // 创建优化器 + optimizer := NewSGD(model.Parameters(), 0.01) + + // 创建训练器 + trainer := NewTrainer(model, optimizer) + + // 创建训练数据 + inputs := []*Tensor{ + Must(NewVector([]float64{1, 0})), + Must(NewVector([]float64{0, 1})), + } + + targets := []*Tensor{ + Must(NewVector([]float64{1, 0})), + Must(NewVector([]float64{0, 1})), + } + + // 定义损失函数 + lossFn := func(output, target *Tensor) *Tensor { + // MSE 损失函数 + diff, _ := output.Data.Subtract(target.Data) + squared, _ := diff.Multiply(diff) + + // 计算矩阵元素的总和 + var total float64 + shape := squared.Shape() + if len(shape) == 1 { + for i := 0; i < squared.Size(); i++ { + val, _ := squared.Get(i) + total += val + } + } else if len(shape) == 2 { + rows, cols := shape[0], shape[1] + for i := 0; i < rows; i++ { + for j := 0; j < cols; j++ { + val, _ := squared.Get(i, j) + total += val + } + } + } else { + // 对于其他维度,遍历所有元素 + for i := 0; i < squared.Size(); i++ { + var val float64 + var err error + + if len(shape) == 1 { + val, err = squared.Get(i) + } else if len(shape) == 2 { + cols := shape[1] + val, err = squared.Get(i/cols, i%cols) + } else { + // 对于高维张量,按顺序访问元素 + val, err = squared.Get(i) + } + + if err != nil { + continue + } + + total += val + } + } + + size := float64(output.Size()) + resultVal := total / size + + // 创建结果张量 + result, _ := NewTensor([]float64{resultVal}, []int{1}) + return result + } + + // 测试TrainEpoch + avgLoss, err := trainer.TrainEpoch(inputs, targets, lossFn) + if err != nil { + t.Errorf("TrainEpoch failed: %v", err) + } + + if avgLoss < 0 { + t.Errorf("Expected non-negative loss, got %v", avgLoss) + } + + // 测试Evaluate + evalLoss, err := trainer.Evaluate(inputs, targets, lossFn) + if err != nil { + t.Errorf("Evaluate failed: %v", err) + } + + if evalLoss < 0 { + t.Errorf("Expected non-negative evaluation loss, got %v", evalLoss) + } +} + +// TestTrainerFullTrain 测试完整的训练过程 +func TestTrainerFullTrain(t *testing.T) { + // 创建一个简单的线性回归模型 + mockLayer := NewMockLayer() + model := &MockModel{ + Layer: mockLayer, + } + + // 创建优化器 + optimizer := NewSGD(model.Parameters(), 0.1) + + // 创建训练器 + trainer := NewTrainer(model, optimizer) + + // 创建训练数据 (简单的 y = x 示例) + trainInputs := []*Tensor{ + Must(NewVector([]float64{1, 0})), + Must(NewVector([]float64{0, 1})), + Must(NewVector([]float64{1, 1})), + } + + trainTargets := []*Tensor{ + Must(NewVector([]float64{1, 0})), + Must(NewVector([]float64{0, 1})), + Must(NewVector([]float64{1, 1})), + } + + // 定义损失函数 + lossFn := func(output, target *Tensor) *Tensor { + // MSE 损失函数 + diff, _ := output.Data.Subtract(target.Data) + squared, _ := diff.Multiply(diff) + sum := squared.Sum() + size := float64(output.Size()) + result := sum / size + return Must(NewTensor([]float64{result}, []int{1})) + // return result + } + + // 训练模型 + epochs := 5 + err := trainer.Train(trainInputs, trainTargets, epochs, lossFn, false) + if err != nil { + t.Errorf("Full training failed: %v", err) + } + + // 验证训练后的损失应该比训练前低(理想情况下) + evalLoss, err := trainer.Evaluate(trainInputs, trainTargets, lossFn) + if err != nil { + t.Errorf("Post-training evaluation failed: %v", err) + } + + if evalLoss < 0 { + t.Errorf("Expected non-negative evaluation loss after training, got %v", evalLoss) + } +} + +// TestTrainerWithDifferentOptimizers 测试使用不同优化器的训练器 +func TestTrainerWithDifferentOptimizers(t *testing.T) { + // 创建模型 + mockLayer := NewMockLayer() + model := &MockModel{ + Layer: mockLayer, + } + + // 测试Adam优化器 + adamOpt := NewAdam(model.Parameters(), 0.001, 0.9, 0.999, 1e-8) + trainer := NewTrainer(model, adamOpt) + + // 创建训练数据 + inputs := []*Tensor{ + Must(NewVector([]float64{1, 0})), + Must(NewVector([]float64{0, 1})), + } + + targets := []*Tensor{ + Must(NewVector([]float64{1, 0})), + Must(NewVector([]float64{0, 1})), + } + + // 损失函数 + lossFn := func(output, target *Tensor) *Tensor { + diff, _ := output.Data.Subtract(target.Data) + squared, _ := diff.Multiply(diff) + + // 计算矩阵元素的总和 + var total float64 + shape := squared.Shape() + if len(shape) == 1 { + for i := 0; i < squared.Size(); i++ { + val, _ := squared.Get(i) + total += val + } + } else if len(shape) == 2 { + rows, cols := shape[0], shape[1] + for i := 0; i < rows; i++ { + for j := 0; j < cols; j++ { + val, _ := squared.Get(i, j) + total += val + } + } + } else { + // 对于其他维度,遍历所有元素 + for i := 0; i < squared.Size(); i++ { + var val float64 + var err error + + if len(shape) == 1 { + val, err = squared.Get(i) + } else if len(shape) == 2 { + cols := shape[1] + val, err = squared.Get(i/cols, i%cols) + } else { + // 对于高维张量,按顺序访问元素 + val, err = squared.Get(i) + } + + if err != nil { + continue + } + + total += val + } + } + + size := float64(output.Size()) + resultVal := total / size + + // 创建结果张量 + result, _ := NewTensor([]float64{resultVal}, []int{1}) + return result + } + + // 训练 + err := trainer.Train(inputs, targets, 3, lossFn, false) + if err != nil { + t.Errorf("Training with Adam optimizer failed: %v", err) + } + + evalLoss, err := trainer.Evaluate(inputs, targets, lossFn) + if err != nil { + t.Errorf("Evaluation with Adam optimizer failed: %v", err) + } + + if evalLoss < 0 { + t.Errorf("Expected non-negative evaluation loss with Adam, got %v", evalLoss) + } +}