package gotensor

import (
	"git.kingecg.top/kingecg/gomatrix"
)

// Conv2D 二维卷积操作
func (t *Tensor) Conv2D(kernel *Tensor, stride, padding int) (*Tensor, error) {
	// 假设输入格式为 [batch_size, channels, height, width]
	inputShape := t.Data.Shape()
	kernelShape := kernel.Data.Shape()

	// 检查输入维度
	if len(inputShape) != 4 || len(kernelShape) != 4 {
		return nil, nil // 这里应该返回错误，但暂时返回nil
	}

	batchSize, inputChannels, inputHeight, inputWidth := inputShape[0], inputShape[1], inputShape[2], inputShape[3]
	kernelChannels, numFilters, kernelHeight, kernelWidth := kernelShape[0], kernelShape[1], kernelShape[2], kernelShape[3]

	// 检查通道数是否匹配
	if inputChannels != kernelChannels {
		return nil, nil // 应该返回错误
	}

	// 计算输出尺寸
	outputHeight := (inputHeight + 2*padding - kernelHeight) / stride + 1
	outputWidth := (inputWidth + 2*padding - kernelWidth) / stride + 1

	// 分配输出矩阵
	outputSize := batchSize * numFilters * outputHeight * outputWidth
	outputData := make([]float64, outputSize)
	outputShape := []int{batchSize, numFilters, outputHeight, outputWidth}

	// 执行卷积操作
	outputIdx := 0
	for b := 0; b < batchSize; b++ { // batch
		for f := 0; f < numFilters; f++ { // filter
			for oh := 0; oh < outputHeight; oh++ { // output height
				for ow := 0; ow < outputWidth; ow++ { // output width
					// 计算卷积结果
					sum := 0.0
					for c := 0; c < inputChannels; c++ { // channel
						for kh := 0; kh < kernelHeight; kh++ { // kernel height
							for kw := 0; kw < kernelWidth; kw++ { // kernel width
								ih := oh*stride - padding + kh
								iw := ow*stride - padding + kw

								if ih >= 0 && ih < inputHeight && iw >= 0 && iw < inputWidth {
									inputVal, _ := t.Data.Get(b, c, ih, iw)
									kernelVal, _ := kernel.Data.Get(c, f, kh, kw)
									sum += inputVal * kernelVal
								}
							}
						}
					}
					outputData[outputIdx] = sum
					outputIdx++
				}
			}
		}
	}

	outputMatrix, err := gomatrix.NewMatrix(outputData, outputShape)
	if err != nil {
		return nil, err
	}

	output := &Tensor{
		Data: outputMatrix,
		Op:   "conv2d",
	}

	output.Prevs[0] = t
	output.Prevs[1] = kernel
	output.Num_Prevs = 2
	output.Args[0] = stride
	output.Args[1] = padding

	output.backwardFunc = func() {
		if t.Grad != nil {
			// 这里应该实现卷积的反向传播
			// 由于复杂性，此处暂不实现
		}
		if kernel.Grad != nil {
			// 这里应该实现核的梯度计算
			// 由于复杂性，此处暂不实现
		}
	}

	return output, nil
}

// MaxPool2D 二维最大池化操作
func (t *Tensor) MaxPool2D(kernelSize, stride int) (*Tensor, error) {
	// 假设输入格式为 [batch_size, channels, height, width]
	inputShape := t.Data.Shape()

	if len(inputShape) != 4 {
		return nil, nil // 应该返回错误
	}

	batchSize, inputChannels, inputHeight, inputWidth := inputShape[0], inputShape[1], inputShape[2], inputShape[3]

	// 计算输出尺寸（假定没有padding）
	outputHeight := (inputHeight-kernelSize)/stride + 1
	outputWidth := (inputWidth-kernelSize)/stride + 1

	// 分配输出矩阵
	outputSize := batchSize * inputChannels * outputHeight * outputWidth
	outputData := make([]float64, outputSize)
	outputShape := []int{batchSize, inputChannels, outputHeight, outputWidth}

	// 记录最大值位置，用于反向传播
	maxIndices := make([]int, len(outputData))

	// 执行最大池化操作
	outputIdx := 0
	for b := 0; b < batchSize; b++ {
		for c := 0; c < inputChannels; c++ {
			for oh := 0; oh < outputHeight; oh++ {
				for ow := 0; ow < outputWidth; ow++ {
					startH := oh * stride
					startW := ow * stride
					
					maxVal := -1e9 // 初始为极小值
					maxIH, maxIW := -1, -1 // 最大值的位置
					
					// 在池化窗口内找最大值
					for kh := 0; kh < kernelSize; kh++ {
						for kw := 0; kw < kernelSize; kw++ {
							ih := startH + kh
							iw := startW + kw
							
							if ih < inputHeight && iw < inputWidth {
								inputVal, _ := t.Data.Get(b, c, ih, iw)
								if inputVal > maxVal {
									maxVal = inputVal
									maxIH = ih
									maxIW = iw
								}
							}
						}
					}
					
					outputData[outputIdx] = maxVal
					// 存储在扁平化数组中的索引
					maxIndices[outputIdx] = ((b*inputChannels+c)*inputHeight+maxIH)*inputWidth + maxIW
					outputIdx++
				}
			}
		}
	}

	outputMatrix, err := gomatrix.NewMatrix(outputData, outputShape)
	if err != nil {
		return nil, err
	}

	output := &Tensor{
		Data: outputMatrix,
		Op:   "maxpool2d",
	}

	output.Prevs[0] = t
	output.Num_Prevs = 1
	output.Args[0] = kernelSize
	output.Args[1] = stride

	output.backwardFunc = func() {
		if t.Grad != nil {
			// 反向传播：只将梯度传递给最大值位置
			// 遍历输出的每个元素
			outputIdx := 0
			for b := 0; b < batchSize; b++ {
				for c := 0; c < inputChannels; c++ {
					for oh := 0; oh < outputHeight; oh++ {
						for ow := 0; ow < outputWidth; ow++ {
							// 获取最大值在输入中的位置
							inIdx := maxIndices[outputIdx]
							
							// 获取对应的输出梯度
							outputGrad, _ := output.Grad.Get(b, c, oh, ow)
							
							// 将输出梯度添加到输入对应位置
							// 需要将一维索引转换为多维索引
							b_idx := inIdx / (inputChannels * inputHeight * inputWidth)
							remaining := inIdx % (inputChannels * inputHeight * inputWidth)
							c_idx := remaining / (inputHeight * inputWidth)
							remaining = remaining % (inputHeight * inputWidth)
							h_idx := remaining / inputWidth
							w_idx := remaining % inputWidth
							
							currentGrad, _ := t.Grad.Get(b_idx, c_idx, h_idx, w_idx)
							newGrad := currentGrad + outputGrad
							t.Grad.Set(newGrad, b_idx, c_idx, h_idx, w_idx)
							
							outputIdx++
						}
					}
				}
			}
		}
	}

	return output, nil
}

// AvgPool2D 二维平均池化操作
func (t *Tensor) AvgPool2D(kernelSize, stride int) (*Tensor, error) {
	// 假设输入格式为 [batch_size, channels, height, width]
	inputShape := t.Data.Shape()

	if len(inputShape) != 4 {
		return nil, nil // 应该返回错误
	}

	batchSize, inputChannels, inputHeight, inputWidth := inputShape[0], inputShape[1], inputShape[2], inputShape[3]

	// 计算输出尺寸（假定没有padding）
	outputHeight := (inputHeight-kernelSize)/stride + 1
	outputWidth := (inputWidth-kernelSize)/stride + 1

	// 分配输出矩阵
	outputSize := batchSize * inputChannels * outputHeight * outputWidth
	outputData := make([]float64, outputSize)
	outputShape := []int{batchSize, inputChannels, outputHeight, outputWidth}

	// 执行平均池化操作
	outputIdx := 0
	for b := 0; b < batchSize; b++ {
		for c := 0; c < inputChannels; c++ {
			for oh := 0; oh < outputHeight; oh++ {
				for ow := 0; ow < outputWidth; ow++ {
					startH := oh * stride
					startW := ow * stride
					
					sum := 0.0
					count := 0
					
					// 在池化窗口内计算平均值
					for kh := 0; kh < kernelSize; kh++ {
						for kw := 0; kw < kernelSize; kw++ {
							ih := startH + kh
							iw := startW + kw
							
							if ih < inputHeight && iw < inputWidth {
								inputVal, _ := t.Data.Get(b, c, ih, iw)
								sum += inputVal
								count++
							}
						}
					}
					
					avgVal := sum / float64(count)
					outputData[outputIdx] = avgVal
					outputIdx++
				}
			}
		}
	}

	outputMatrix, err := gomatrix.NewMatrix(outputData, outputShape)
	if err != nil {
		return nil, err
	}

	output := &Tensor{
		Data: outputMatrix,
		Op:   "avgpool2d",
	}

	output.Prevs[0] = t
	output.Num_Prevs = 1
	output.Args[0] = kernelSize
	output.Args[1] = stride

	output.backwardFunc = func() {
		if t.Grad != nil {
			// 反向传播：将平均梯度分配给池化窗口内的所有元素
			outputIdx := 0
			for b := 0; b < batchSize; b++ {
				for c := 0; c < inputChannels; c++ {
					for oh := 0; oh < outputHeight; oh++ {
						for ow := 0; ow < outputWidth; ow++ {
							startH := oh * stride
							startW := ow * stride
							
							outputGrad, _ := output.Grad.Get(b, c, oh, ow)
							avgGrad := outputGrad / float64(kernelSize*kernelSize)
							
							// 将平均梯度分配给对应区域
							for kh := 0; kh < kernelSize; kh++ {
								for kw := 0; kw < kernelSize; kw++ {
									ih := startH + kh
									iw := startW + kw
									
									if ih < inputHeight && iw < inputWidth {
										currentGrad, _ := t.Grad.Get(b, c, ih, iw)
										newGrad := currentGrad + avgGrad
										t.Grad.Set(newGrad, b, c, ih, iw)
									}
								}
							}
							outputIdx++
						}
					}
				}
			}
		}
	}

	return output, nil
}