gotensor/convolution.go

314 lines
8.5 KiB
Go
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package gotensor
import (
"git.kingecg.top/kingecg/gomatrix"
)
// Conv2D 二维卷积操作
func (t *Tensor) Conv2D(kernel *Tensor, stride, padding int) (*Tensor, error) {
// 假设输入格式为 [batch_size, channels, height, width]
inputShape := t.Data.Shape()
kernelShape := kernel.Data.Shape()
// 检查输入维度
if len(inputShape) != 4 || len(kernelShape) != 4 {
return nil, nil // 这里应该返回错误但暂时返回nil
}
batchSize, inputChannels, inputHeight, inputWidth := inputShape[0], inputShape[1], inputShape[2], inputShape[3]
kernelChannels, numFilters, kernelHeight, kernelWidth := kernelShape[0], kernelShape[1], kernelShape[2], kernelShape[3]
// 检查通道数是否匹配
if inputChannels != kernelChannels {
return nil, nil // 应该返回错误
}
// 计算输出尺寸
outputHeight := (inputHeight + 2*padding - kernelHeight) / stride + 1
outputWidth := (inputWidth + 2*padding - kernelWidth) / stride + 1
// 分配输出矩阵
outputSize := batchSize * numFilters * outputHeight * outputWidth
outputData := make([]float64, outputSize)
outputShape := []int{batchSize, numFilters, outputHeight, outputWidth}
// 执行卷积操作
outputIdx := 0
for b := 0; b < batchSize; b++ { // batch
for f := 0; f < numFilters; f++ { // filter
for oh := 0; oh < outputHeight; oh++ { // output height
for ow := 0; ow < outputWidth; ow++ { // output width
// 计算卷积结果
sum := 0.0
for c := 0; c < inputChannels; c++ { // channel
for kh := 0; kh < kernelHeight; kh++ { // kernel height
for kw := 0; kw < kernelWidth; kw++ { // kernel width
ih := oh*stride - padding + kh
iw := ow*stride - padding + kw
if ih >= 0 && ih < inputHeight && iw >= 0 && iw < inputWidth {
inputVal, _ := t.Data.Get(b, c, ih, iw)
kernelVal, _ := kernel.Data.Get(c, f, kh, kw)
sum += inputVal * kernelVal
}
}
}
}
outputData[outputIdx] = sum
outputIdx++
}
}
}
}
outputMatrix, err := gomatrix.NewMatrix(outputData, outputShape)
if err != nil {
return nil, err
}
output := &Tensor{
Data: outputMatrix,
Op: "conv2d",
}
output.Prevs[0] = t
output.Prevs[1] = kernel
output.Num_Prevs = 2
output.Args[0] = stride
output.Args[1] = padding
output.backwardFunc = func() {
if t.Grad != nil {
// 这里应该实现卷积的反向传播
// 由于复杂性,此处暂不实现
}
if kernel.Grad != nil {
// 这里应该实现核的梯度计算
// 由于复杂性,此处暂不实现
}
}
return output, nil
}
// MaxPool2D 二维最大池化操作
func (t *Tensor) MaxPool2D(kernelSize, stride int) (*Tensor, error) {
// 假设输入格式为 [batch_size, channels, height, width]
inputShape := t.Data.Shape()
if len(inputShape) != 4 {
return nil, nil // 应该返回错误
}
batchSize, inputChannels, inputHeight, inputWidth := inputShape[0], inputShape[1], inputShape[2], inputShape[3]
// 计算输出尺寸假定没有padding
outputHeight := (inputHeight-kernelSize)/stride + 1
outputWidth := (inputWidth-kernelSize)/stride + 1
// 分配输出矩阵
outputSize := batchSize * inputChannels * outputHeight * outputWidth
outputData := make([]float64, outputSize)
outputShape := []int{batchSize, inputChannels, outputHeight, outputWidth}
// 记录最大值位置,用于反向传播
maxIndices := make([]int, len(outputData))
// 执行最大池化操作
outputIdx := 0
for b := 0; b < batchSize; b++ {
for c := 0; c < inputChannels; c++ {
for oh := 0; oh < outputHeight; oh++ {
for ow := 0; ow < outputWidth; ow++ {
startH := oh * stride
startW := ow * stride
maxVal := -1e9 // 初始为极小值
maxIH, maxIW := -1, -1 // 最大值的位置
// 在池化窗口内找最大值
for kh := 0; kh < kernelSize; kh++ {
for kw := 0; kw < kernelSize; kw++ {
ih := startH + kh
iw := startW + kw
if ih < inputHeight && iw < inputWidth {
inputVal, _ := t.Data.Get(b, c, ih, iw)
if inputVal > maxVal {
maxVal = inputVal
maxIH = ih
maxIW = iw
}
}
}
}
outputData[outputIdx] = maxVal
// 存储在扁平化数组中的索引
maxIndices[outputIdx] = ((b*inputChannels+c)*inputHeight+maxIH)*inputWidth + maxIW
outputIdx++
}
}
}
}
outputMatrix, err := gomatrix.NewMatrix(outputData, outputShape)
if err != nil {
return nil, err
}
output := &Tensor{
Data: outputMatrix,
Op: "maxpool2d",
}
output.Prevs[0] = t
output.Num_Prevs = 1
output.Args[0] = kernelSize
output.Args[1] = stride
output.backwardFunc = func() {
if t.Grad != nil {
// 反向传播:只将梯度传递给最大值位置
// 遍历输出的每个元素
outputIdx := 0
for b := 0; b < batchSize; b++ {
for c := 0; c < inputChannels; c++ {
for oh := 0; oh < outputHeight; oh++ {
for ow := 0; ow < outputWidth; ow++ {
// 获取最大值在输入中的位置
inIdx := maxIndices[outputIdx]
// 获取对应的输出梯度
outputGrad, _ := output.Grad.Get(b, c, oh, ow)
// 将输出梯度添加到输入对应位置
// 需要将一维索引转换为多维索引
b_idx := inIdx / (inputChannels * inputHeight * inputWidth)
remaining := inIdx % (inputChannels * inputHeight * inputWidth)
c_idx := remaining / (inputHeight * inputWidth)
remaining = remaining % (inputHeight * inputWidth)
h_idx := remaining / inputWidth
w_idx := remaining % inputWidth
currentGrad, _ := t.Grad.Get(b_idx, c_idx, h_idx, w_idx)
newGrad := currentGrad + outputGrad
t.Grad.Set(newGrad, b_idx, c_idx, h_idx, w_idx)
outputIdx++
}
}
}
}
}
}
return output, nil
}
// AvgPool2D 二维平均池化操作
func (t *Tensor) AvgPool2D(kernelSize, stride int) (*Tensor, error) {
// 假设输入格式为 [batch_size, channels, height, width]
inputShape := t.Data.Shape()
if len(inputShape) != 4 {
return nil, nil // 应该返回错误
}
batchSize, inputChannels, inputHeight, inputWidth := inputShape[0], inputShape[1], inputShape[2], inputShape[3]
// 计算输出尺寸假定没有padding
outputHeight := (inputHeight-kernelSize)/stride + 1
outputWidth := (inputWidth-kernelSize)/stride + 1
// 分配输出矩阵
outputSize := batchSize * inputChannels * outputHeight * outputWidth
outputData := make([]float64, outputSize)
outputShape := []int{batchSize, inputChannels, outputHeight, outputWidth}
// 执行平均池化操作
outputIdx := 0
for b := 0; b < batchSize; b++ {
for c := 0; c < inputChannels; c++ {
for oh := 0; oh < outputHeight; oh++ {
for ow := 0; ow < outputWidth; ow++ {
startH := oh * stride
startW := ow * stride
sum := 0.0
count := 0
// 在池化窗口内计算平均值
for kh := 0; kh < kernelSize; kh++ {
for kw := 0; kw < kernelSize; kw++ {
ih := startH + kh
iw := startW + kw
if ih < inputHeight && iw < inputWidth {
inputVal, _ := t.Data.Get(b, c, ih, iw)
sum += inputVal
count++
}
}
}
avgVal := sum / float64(count)
outputData[outputIdx] = avgVal
outputIdx++
}
}
}
}
outputMatrix, err := gomatrix.NewMatrix(outputData, outputShape)
if err != nil {
return nil, err
}
output := &Tensor{
Data: outputMatrix,
Op: "avgpool2d",
}
output.Prevs[0] = t
output.Num_Prevs = 1
output.Args[0] = kernelSize
output.Args[1] = stride
output.backwardFunc = func() {
if t.Grad != nil {
// 反向传播:将平均梯度分配给池化窗口内的所有元素
outputIdx := 0
for b := 0; b < batchSize; b++ {
for c := 0; c < inputChannels; c++ {
for oh := 0; oh < outputHeight; oh++ {
for ow := 0; ow < outputWidth; ow++ {
startH := oh * stride
startW := ow * stride
outputGrad, _ := output.Grad.Get(b, c, oh, ow)
avgGrad := outputGrad / float64(kernelSize*kernelSize)
// 将平均梯度分配给对应区域
for kh := 0; kh < kernelSize; kh++ {
for kw := 0; kw < kernelSize; kw++ {
ih := startH + kh
iw := startW + kw
if ih < inputHeight && iw < inputWidth {
currentGrad, _ := t.Grad.Get(b, c, ih, iw)
newGrad := currentGrad + avgGrad
t.Grad.Set(newGrad, b, c, ih, iw)
}
}
}
outputIdx++
}
}
}
}
}
}
return output, nil
}