cpp#include <iostream>
#include <opencv2/opencv.hpp>
#include <cuda_runtime.h>
// CUDA カーネル関数
__global__ void rgbToGrayKernel(unsigned char* d_input, unsigned char* d_output, int width, int height, int channels) {
int x = blockIdx.x * blockDim.x + threadIdx.x;
int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x < width && y < height) {
int idx = (y * width + x) * channels;
unsigned char r = d_input[idx];
unsigned char g = d_input[idx + 1];
unsigned char b = d_input[idx + 2];
d_output[y * width + x] = static_cast<unsigned char>(0.299f * r + 0.587f * g + 0.114f * b);
}
}
int main() {
// 画像の読み込み
cv::Mat inputImage = cv::imread("input.jpg");
if (inputImage.empty()) {
std::cerr << "画像が読み込めませんでした!" << std::endl;
return -1;
}
int width = inputImage.cols;
int height = inputImage.rows;
int channels = inputImage.channels();
// 出力画像の作成
cv::Mat outputImage(height, width, CV_8UC1);
// GPUメモリの確保
unsigned char* d_input;
unsigned char* d_output;
cudaMalloc((void**)&d_input, width * height * channels * sizeof(unsigned char));
cudaMalloc((void**)&d_output, width * height * sizeof(unsigned char));
// データをGPUに転送
cudaMemcpy(d_input, inputImage.data, width * height * channels * sizeof(unsigned char), cudaMemcpyHostToDevice);
// カーネル起動の設定
dim3 blockSize(16, 16);
dim3 gridSize((width + blockSize.x - 1) / blockSize.x, (height + blockSize.y - 1) / blockSize.y);
// カーネルを起動
rgbToGrayKernel<<<gridSize, blockSize>>>(d_input, d_output, width, height, channels);
// 結果をホストに転送
cudaMemcpy(outputImage.data, d_output, width * height * sizeof(unsigned char), cudaMemcpyDeviceToHost);
// 結果を表示および保存
cv::imshow("Original Image", inputImage);
cv::imshow("Grayscale Image", outputImage);
cv::imwrite("output.jpg", outputImage);
cv::waitKey(0);
// GPUメモリの解放
cudaFree(d_input);
cudaFree(d_output);
return 0;
}