package gotensor import ( "git.kingecg.top/kingecg/gomatrix" ) // Conv2D 二维卷积操作 func (t *Tensor) Conv2D(kernel *Tensor, stride, padding int) (*Tensor, error) { // 假设输入格式为 [batch_size, channels, height, width] inputShape := t.Data.Shape() kernelShape := kernel.Data.Shape() // 检查输入维度 if len(inputShape) != 4 || len(kernelShape) != 4 { return nil, nil // 这里应该返回错误,但暂时返回nil } batchSize, inputChannels, inputHeight, inputWidth := inputShape[0], inputShape[1], inputShape[2], inputShape[3] kernelChannels, numFilters, kernelHeight, kernelWidth := kernelShape[0], kernelShape[1], kernelShape[2], kernelShape[3] // 检查通道数是否匹配 if inputChannels != kernelChannels { return nil, nil // 应该返回错误 } // 计算输出尺寸 outputHeight := (inputHeight + 2*padding - kernelHeight) / stride + 1 outputWidth := (inputWidth + 2*padding - kernelWidth) / stride + 1 // 分配输出矩阵 outputSize := batchSize * numFilters * outputHeight * outputWidth outputData := make([]float64, outputSize) outputShape := []int{batchSize, numFilters, outputHeight, outputWidth} // 执行卷积操作 outputIdx := 0 for b := 0; b < batchSize; b++ { // batch for f := 0; f < numFilters; f++ { // filter for oh := 0; oh < outputHeight; oh++ { // output height for ow := 0; ow < outputWidth; ow++ { // output width // 计算卷积结果 sum := 0.0 for c := 0; c < inputChannels; c++ { // channel for kh := 0; kh < kernelHeight; kh++ { // kernel height for kw := 0; kw < kernelWidth; kw++ { // kernel width ih := oh*stride - padding + kh iw := ow*stride - padding + kw if ih >= 0 && ih < inputHeight && iw >= 0 && iw < inputWidth { inputVal, _ := t.Data.Get(b, c, ih, iw) kernelVal, _ := kernel.Data.Get(c, f, kh, kw) sum += inputVal * kernelVal } } } } outputData[outputIdx] = sum outputIdx++ } } } } outputMatrix, err := gomatrix.NewMatrix(outputData, outputShape) if err != nil { return nil, err } output := &Tensor{ Data: outputMatrix, Op: "conv2d", } output.Prevs[0] = t output.Prevs[1] = kernel output.Num_Prevs = 2 output.Args[0] = stride output.Args[1] = padding output.backwardFunc = func() { if t.Grad != nil { // 这里应该实现卷积的反向传播 // 由于复杂性,此处暂不实现 } if kernel.Grad != nil { // 这里应该实现核的梯度计算 // 由于复杂性,此处暂不实现 } } return output, nil } // MaxPool2D 二维最大池化操作 func (t *Tensor) MaxPool2D(kernelSize, stride int) (*Tensor, error) { // 假设输入格式为 [batch_size, channels, height, width] inputShape := t.Data.Shape() if len(inputShape) != 4 { return nil, nil // 应该返回错误 } batchSize, inputChannels, inputHeight, inputWidth := inputShape[0], inputShape[1], inputShape[2], inputShape[3] // 计算输出尺寸(假定没有padding) outputHeight := (inputHeight-kernelSize)/stride + 1 outputWidth := (inputWidth-kernelSize)/stride + 1 // 分配输出矩阵 outputSize := batchSize * inputChannels * outputHeight * outputWidth outputData := make([]float64, outputSize) outputShape := []int{batchSize, inputChannels, outputHeight, outputWidth} // 记录最大值位置,用于反向传播 maxIndices := make([]int, len(outputData)) // 执行最大池化操作 outputIdx := 0 for b := 0; b < batchSize; b++ { for c := 0; c < inputChannels; c++ { for oh := 0; oh < outputHeight; oh++ { for ow := 0; ow < outputWidth; ow++ { startH := oh * stride startW := ow * stride maxVal := -1e9 // 初始为极小值 maxIH, maxIW := -1, -1 // 最大值的位置 // 在池化窗口内找最大值 for kh := 0; kh < kernelSize; kh++ { for kw := 0; kw < kernelSize; kw++ { ih := startH + kh iw := startW + kw if ih < inputHeight && iw < inputWidth { inputVal, _ := t.Data.Get(b, c, ih, iw) if inputVal > maxVal { maxVal = inputVal maxIH = ih maxIW = iw } } } } outputData[outputIdx] = maxVal // 存储在扁平化数组中的索引 maxIndices[outputIdx] = ((b*inputChannels+c)*inputHeight+maxIH)*inputWidth + maxIW outputIdx++ } } } } outputMatrix, err := gomatrix.NewMatrix(outputData, outputShape) if err != nil { return nil, err } output := &Tensor{ Data: outputMatrix, Op: "maxpool2d", } output.Prevs[0] = t output.Num_Prevs = 1 output.Args[0] = kernelSize output.Args[1] = stride output.backwardFunc = func() { if t.Grad != nil { // 反向传播:只将梯度传递给最大值位置 // 遍历输出的每个元素 outputIdx := 0 for b := 0; b < batchSize; b++ { for c := 0; c < inputChannels; c++ { for oh := 0; oh < outputHeight; oh++ { for ow := 0; ow < outputWidth; ow++ { // 获取最大值在输入中的位置 inIdx := maxIndices[outputIdx] // 获取对应的输出梯度 outputGrad, _ := output.Grad.Get(b, c, oh, ow) // 将输出梯度添加到输入对应位置 // 需要将一维索引转换为多维索引 b_idx := inIdx / (inputChannels * inputHeight * inputWidth) remaining := inIdx % (inputChannels * inputHeight * inputWidth) c_idx := remaining / (inputHeight * inputWidth) remaining = remaining % (inputHeight * inputWidth) h_idx := remaining / inputWidth w_idx := remaining % inputWidth currentGrad, _ := t.Grad.Get(b_idx, c_idx, h_idx, w_idx) newGrad := currentGrad + outputGrad t.Grad.Set(newGrad, b_idx, c_idx, h_idx, w_idx) outputIdx++ } } } } } } return output, nil } // AvgPool2D 二维平均池化操作 func (t *Tensor) AvgPool2D(kernelSize, stride int) (*Tensor, error) { // 假设输入格式为 [batch_size, channels, height, width] inputShape := t.Data.Shape() if len(inputShape) != 4 { return nil, nil // 应该返回错误 } batchSize, inputChannels, inputHeight, inputWidth := inputShape[0], inputShape[1], inputShape[2], inputShape[3] // 计算输出尺寸(假定没有padding) outputHeight := (inputHeight-kernelSize)/stride + 1 outputWidth := (inputWidth-kernelSize)/stride + 1 // 分配输出矩阵 outputSize := batchSize * inputChannels * outputHeight * outputWidth outputData := make([]float64, outputSize) outputShape := []int{batchSize, inputChannels, outputHeight, outputWidth} // 执行平均池化操作 outputIdx := 0 for b := 0; b < batchSize; b++ { for c := 0; c < inputChannels; c++ { for oh := 0; oh < outputHeight; oh++ { for ow := 0; ow < outputWidth; ow++ { startH := oh * stride startW := ow * stride sum := 0.0 count := 0 // 在池化窗口内计算平均值 for kh := 0; kh < kernelSize; kh++ { for kw := 0; kw < kernelSize; kw++ { ih := startH + kh iw := startW + kw if ih < inputHeight && iw < inputWidth { inputVal, _ := t.Data.Get(b, c, ih, iw) sum += inputVal count++ } } } avgVal := sum / float64(count) outputData[outputIdx] = avgVal outputIdx++ } } } } outputMatrix, err := gomatrix.NewMatrix(outputData, outputShape) if err != nil { return nil, err } output := &Tensor{ Data: outputMatrix, Op: "avgpool2d", } output.Prevs[0] = t output.Num_Prevs = 1 output.Args[0] = kernelSize output.Args[1] = stride output.backwardFunc = func() { if t.Grad != nil { // 反向传播:将平均梯度分配给池化窗口内的所有元素 outputIdx := 0 for b := 0; b < batchSize; b++ { for c := 0; c < inputChannels; c++ { for oh := 0; oh < outputHeight; oh++ { for ow := 0; ow < outputWidth; ow++ { startH := oh * stride startW := ow * stride outputGrad, _ := output.Grad.Get(b, c, oh, ow) avgGrad := outputGrad / float64(kernelSize*kernelSize) // 将平均梯度分配给对应区域 for kh := 0; kh < kernelSize; kh++ { for kw := 0; kw < kernelSize; kw++ { ih := startH + kh iw := startW + kw if ih < inputHeight && iw < inputWidth { currentGrad, _ := t.Grad.Get(b, c, ih, iw) newGrad := currentGrad + avgGrad t.Grad.Set(newGrad, b, c, ih, iw) } } } outputIdx++ } } } } } } return output, nil }