事始め

HPCの分野はやはりc/c++で開発されることが多いよね。しかしね、やはりc/c++はビルドとか、マジで面倒くさいんですわ。コンパイル型言語はよ。 だからインタープリタ型言語から、c/c++のライブラリを呼び出せたらいいよね、っていうので作られたのがpybind11とかっていうやつなんだよね。

今回は、c/c++で書かれた科学技術データ向けの非可逆圧縮アルゴリズムの一つである cuSZp をpythonから呼び出す方法を 記録します。

前例

けいち先生がすでにMGARDをpythonから呼び出せるようにpybindをやってくれています。 これを参考にしたいと思います。ということで、先生が何をやったのか、の解析から始めたいと思います。

pymgardのcmake

解説しながら進めていきたいと思います。大事なところだけ。



project(
  MGARD
  HOMEPAGE_URL "https://github.com/CODARcode/MGARD"
  VERSION "${MGARD_VERSION_MAJOR}.${MGARD_VERSION_MINOR}.${MGARD_VERSION_PATCH}"
  #CUDA will be enabled below if `MGARD_ENABLE_CUDA` is `ON`.
  LANGUAGES CXX
)


include(MgardXGenerateSource)
find_package(PkgConfig REQUIRED)

add_executable(mgard-x ${CMAKE_CURRENT_SOURCE_DIR}/src/mgard-x/Executables/mgard-x.cpp)
target_link_libraries(mgard-x mgard-library  ${CMAKE_DL_LIBS})

target_include_directories(mgard-x PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
                                        $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}/include>)
install(TARGETS mgard-x)

add_executable(mgard-x-autotuner ${CMAKE_CURRENT_SOURCE_DIR}/src/mgard-x/Executables/mgard-x-autotuner.cpp)
target_link_libraries(mgard-x-autotuner mgard-library  ${CMAKE_DL_LIBS})
target_include_directories(mgard-x-autotuner PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
                                                    $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}/include>)
install(TARGETS mgard-x-autotuner)

#Adding library here so we can set compile definitions for it.
add_library(mgard-library)

if(MGARD_ENABLE_CUDA)
  enable_language(CUDA)
  set(CMAKE_CUDA_STANDARD_REQUIRED TRUE)
  if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
      set(CMAKE_CUDA_ARCHITECTURES 70)
  endif()

  find_package(nvcomp REQUIRED)
  find_package(CUDAToolkit REQUIRED)

  target_compile_definitions(mgard-library PUBLIC MGARD_ENABLE_CUDA)
  set (CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr -w --generate-line-info")
  
  set (MGARD_X_SEPARATE_COMPILE_COMPILATION_OPTION 
       CUDA_SEPARABLE_COMPILATION ON)

  set_source_files_properties(${MGARD_X_CUDA_SRC} PROPERTIES LANGUAGE CUDA)
endif()

find_package(ZLIB REQUIRED)

find_package(PkgConfig REQUIRED)

set(
  MGARD_LIBRARY_CPP
        src/compress.cpp
        src/compress_internal.cpp
  src/compressors.cpp
  src/format.cpp
)

set(MGARD_GENERATE_DIMENSION_SWITCH "${CMAKE_CURRENT_SOURCE_DIR}/scripts/generate_dimension_switch.py")
set(MGARD_COMPRESS_INTERNAL_CPP "${MGARD_FILE_FORMAT_SRCDIR}/compress_internal_generated.cpp")
add_custom_command(
  OUTPUT
  "${MGARD_COMPRESS_INTERNAL_CPP}"
  COMMAND "${PYTHON3}"
  ARGS
  "${MGARD_GENERATE_DIMENSION_SWITCH}"
  "--decompress"
  "${MAXIMUM_DIMENSION}"
  "${MGARD_COMPRESS_INTERNAL_CPP}"
)
list(APPEND MGARD_LIBRARY_CPP "${MGARD_COMPRESS_INTERNAL_CPP}")


target_sources(
  mgard-library
  PRIVATE
  ${MGARD_LIBRARY_CPP}
  ${MGARD_CUDA_SRC}
  ${MGARD_X_SRC}
  ${MGARD_X_SERIAL_SRC}
  ${MGARD_X_OPENMP_SRC}
  ${MGARD_X_CUDA_SRC}
  ${MGARD_X_HIP_SRC}
  ${MGARD_X_SYCL_SRC}
)

set_target_properties(mgard-library PROPERTIES CUDA_RESOLVE_DEVICE_SYMBOLS ON)

set_target_properties(mgard-library PROPERTIES OUTPUT_NAME mgard)

target_link_libraries(mgard-library PUBLIC PkgConfig::protobuf)


install(TARGETS mgard-library EXPORT mgard-targets)

install(FILES "${PROJECT_BINARY_DIR}/include/MGARDConfig.hpp" DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/mgard")

install(FILES "${PROJECT_BINARY_DIR}/include/MGARDXConfig.h" DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/mgard/mgard-x/RuntimeX")

install(DIRECTORY "include/" DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/mgard")

install(FILES "${MGARD_FILE_FORMAT_HPP}" DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/mgard/${MGARD_FILE_FORMAT_INCLUDESUBDIR_COMPONENT}")


if(MGARD_ENABLE_PYTHON)
  find_package(Python COMPONENTS Interpreter Development.Module REQUIRED)

  include(FetchContent)
  FetchContent_Declare(
      pybind11
      GIT_REPOSITORY https://github.com/pybind/pybind11.git
      GIT_TAG        stable
  )
  FetchContent_MakeAvailable(pybind11)

  pybind11_add_module(_mgard src/python/bindings.cpp)
  target_link_libraries(_mgard PRIVATE mgard-library)
  install(TARGETS _mgard LIBRARY DESTINATION python/mgard)
endif()

# Add all targets to the build-tree export set
export(
  TARGETS mgard-library
  NAMESPACE mgard::
  FILE "${PROJECT_BINARY_DIR}/mgard-targets.cmake"
)

bindings.cpp

今回のcuSZpで、確認してみよう

CMakelist.txtの中身

# Specify the minimum version of CMake required to build the project
cmake_minimum_required(VERSION 3.21)

project(cuSZp
        VERSION 0.0.2
        DESCRIPTION "Error-bounded GPU lossy compression library"
        )
set(namespace "cuSZp")
enable_language(CXX)
enable_language(CUDA)

find_package(CUDAToolkit REQUIRED)

set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)

option(CUSZP_ENABLE_PYTHON "Enable Python bindings." OFF)

set(CMAKE_CUDA_HOST_COMPILER ${CMAKE_CXX_COMPILER})
set(CMAKE_CUDA_SEPARABLE_COMPILATION ON)
set(CMAKE_CUDA_STANDARD "17")
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CUDA_ARCHITECTURES 60 61 62 70 75)
set(CUDA_PROPAGATE_HOST_FLAGS ON)
set(CUDA_LIBRARY CUDA::cudart)

if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
    set_property(CACHE CMAKE_BUILD_TYPE PROPERTY VALUE Release)
endif()

# add_library(${PROJECT_NAME} STATIC)
add_library(${PROJECT_NAME} SHARED)

target_sources(${PROJECT_NAME}
        PRIVATE
        src/cuSZp_f32.cu
        src/cuSZp_f64.cu
        src/cuSZp_utility.cu
        src/cuSZp_timer.cu
        src/cuSZp_entry_f32.cu
        src/cuSZp_entry_f64.cu
        )

target_include_directories(${PROJECT_NAME}
        PRIVATE
        # where the library itself will look for its internal headers
        ${CMAKE_CURRENT_SOURCE_DIR}/src
        PUBLIC
        # where top-level project will look for the library's public headers
        $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
        # where external projects will look for the library's public headers
        $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>
        )

#target_include_directories(${PROJECT_NAME} PRIVATE ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})

target_link_libraries(${PROJECT_NAME} PRIVATE CUDA::cudart)

set(public_headers
        include/cuSZp_f32.h
        include/cuSZp_f64.h
        include/cuSZp_utility.h
        include/cuSZp_timer.h
        include/cuSZp_entry_f32.h
        include/cuSZp_entry_f64.h
        )

set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
include(Installing)

if(CUSZP_ENABLE_PYTHON)
  find_package(Python COMPONENTS Interpreter Development.Module  REQUIRED) # よく出てくる。ライブラリがあるかを確認。cmakeがいい感じにいろいろやってくれる
  find_package(CUDAToolkit REQUIRED)
  enable_language(CUDA)
  set(CUDA_LIBRARY CUDA::cudart)
  
  include(FetchContent) # 便利
  FetchContent_Declare(
      pybind11
      GIT_REPOSITORY https://github.com/pybind/pybind11.git
      GIT_TAG        stable
  )
  FetchContent_MakeAvailable(pybind11)

  pybind11_add_module(_cuSZp src/python/bindings.cpp) # ここで作成するライブラリを指定。add_libraryてきなののpybind版
  target_include_directories(_cuSZp PRIVATE "/usr/local/cuda/include/" "/usr/local/cuSZp/")
  target_include_directories(${PROJECT_NAME}
        PRIVATE
        # where the library itself will look for its internal headers
        ${CMAKE_CURRENT_SOURCE_DIR}/src
        PUBLIC
        # where top-level project will look for the library's public headers
        $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
        # where external projects will look for the library's public headers
        $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>
        )
  target_link_libraries(_cuSZp PRIVATE cuSZp CUDA::cudart)
  install(TARGETS _cuSZp LIBRARY DESTINATION python/cuSZp)
endif()

option(CUSZP_BUILD_EXAMPLES "Option to enable building example programs" ON)
if (CUSZP_BUILD_EXAMPLES)
    add_subdirectory(examples)
endif ()

pybinding.cppの中身

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <cuSZp_utility.h>
#include <cuSZp_entry_f32.h>
#include <pybind11/numpy.h>
#include <pybind11/pybind11.h>

namespace py = pybind11;


py::buffer compress(py::array_t<float> original, float tol) {
    unsigned char *compressed_data = nullptr;
    size_t compressed_size = 0;

    compressed_data = new unsigned char[original.size()];
    
    // pointer_to_original_data(float*), pointer_to_compressed_data(unsigned char*), size_of_original_data(int/size_t), pointer_to_compressed_size, tolerance
    SZp_compress_hostptr_f32(const_cast<float*>(original.data()), compressed_data, original.size(), &compressed_size, tol);

    return py::array_t<unsigned char>({compressed_size}, 
                                    {1}, // stride of the array
                                      static_cast<unsigned char *>(compressed_data),
                                      py::capsule(compressed_data, [](void *ptr) { delete ptr; }));
}

ちょっと、