MKL sparse QR solver for least square

COO to CSR format
代码语言：javascript复制
#include <vector>
#include <iostream>
#include <mkl.h>
#ifdef __linux__
#include <stdlib.h> //for aligned alloc!
#include <cstring> //believe it or not for memcpy!!
#endif
#include "mkl_sparse_qr.h"

// ---------------------
template<class T> T *std_vec_2_aligned_array(const std::vector<T> &vec, const int mem_alignment)
{
    size_t num_bytes = vec.size() * sizeof(T);
    #ifdef __linux__
    T *arr = (T *)aligned_alloc(mem_alignment, num_bytes);
    #else
    T *arr = (T *)_aligned_malloc(num_bytes, mem_alignment);
    #endif
    if (arr == nullptr)
    {
        throw("could not alloc");
    }
    memcpy((void *)(arr), (void *)vec.data(), num_bytes);
    return arr;
}

// ---------------------
sparse_matrix_t create_coo_mtx(int *rows_aligned, int *cols_aligned, float *vals_aligned, int num_rows, int num_cols, const size_t num_non_zeros)
{
    sparse_matrix_t mtx_coo; //the sparse matrix in coo format;
    auto res_creating_coo_mtx = mkl_sparse_s_create_coo(&mtx_coo,
                                                SPARSE_INDEX_BASE_ZERO, // indexing: C-style or Fortran-style
                                                num_rows,
                                                num_cols,
                                                num_non_zeros,
                                                rows_aligned,
                                                cols_aligned,
                                                vals_aligned);
    if (res_creating_coo_mtx != SPARSE_STATUS_SUCCESS)
    {
        throw("Failed creating coo mtx");
    }                                                
    return  mtx_coo;                                             
}
// ---------------------
sparse_matrix_t solve_lsqr( const std::vector<int> &i_idx, const std::vector<int> &j_idx, const std::vector<float> &vals, 
                            const std::vector<float> &b, int num_rows, int num_cols, const int mem_align)
{
    int *rows_aligned = std_vec_2_aligned_array<int>(i_idx, mem_align);
    int *cols_aligned = std_vec_2_aligned_array<int>(j_idx, mem_align);
    float *vals_aligned = std_vec_2_aligned_array<float>(vals, mem_align);
    float *b_aligned = std_vec_2_aligned_array<float>(b, mem_align);    

    size_t num_non_zeros = vals.size();

    sparse_matrix_t coo_mtx = create_coo_mtx(rows_aligned, cols_aligned, vals_aligned, num_rows, num_cols, num_non_zeros);
    sparse_matrix_t csr_mtx;
    sparse_status_t conversion_status = mkl_sparse_convert_csr(coo_mtx, SPARSE_OPERATION_NON_TRANSPOSE, &csr_mtx);

    if (conversion_status != SPARSE_STATUS_SUCCESS) 
    {
        throw("Could not convert creating coo mtx");
    }

    size_t num_bytes_x = num_cols * sizeof(float);
    #ifdef __linux__
    float *x =  (float *)aligned_alloc(mem_align, num_bytes_x);
    #else
    float *x = (float *)_aligned_malloc(num_bytes_x, mem_align);
    #endif
    if (x == nullptr)
    {
        throw("could not alloc for x");
    }

    matrix_descr descr;
    descr.type = SPARSE_MATRIX_TYPE_GENERAL;

    mkl_sparse_s_qr( SPARSE_OPERATION_NON_TRANSPOSE, csr_mtx, descr, SPARSE_LAYOUT_COLUMN_MAJOR, 1, x, num_cols, b_aligned, num_rows);

    for (int i = 0; i < num_cols; i  )
    {
        std::cout << "x[" << i << "] = " << x[i] << std::endl;
    }

    // delete matrices
    mkl_sparse_destroy(coo_mtx); 
    mkl_sparse_destroy(csr_mtx); 


    #ifdef __linux__
    free(rows_aligned);
    free(cols_aligned);
    free(vals_aligned);
    free(b_aligned);
    free(x);
    #else
    _aligned_free(rows_aligned);
    _aligned_free(cols_aligned);
    _aligned_free(vals_aligned);
    _aligned_free(b_aligned);
    _aligned_free(x);    
    #endif

    return csr_mtx;
}

// ---------------------
int main()
{
    
    const int m = 20;
    const int n = 10;
    std::vector<int> i = {14, 15, 2, 8, 14, 19, 4, 13, 2, 6, 9, 0, 4, 13, 14, 5, 8, 15, 18};
    std::vector<int> j = {0, 0, 1, 1, 1, 1, 3, 3, 5, 5, 5, 6, 6, 7, 7, 8, 8, 8, 9};
    std::vector<float> val = {  0.257613736712438,     
                                0.0641870873918986, 
                                0.180737760254794, 
                                0.163512368527526, 
                                0.751946393867450,
                                0.715212514785840,
                                0.0205357746581846,
                                0.577394196706649,
                                0.255386740488051,
                                0.932613572048564,
                                0.794657885388753,
                                0.415093386613047,
                                0.923675612620407,
                                0.440035595760254,
                                0.228669482105501,
                                0.653699889008253,
                                0.921097255892198,
                                0.767329510776574,
                                0.671202185356536};
    
    std::vector<float> b = {    0.328215158820962,
                                0,
                                0.230599734662620,
                                0,
                                0.746706152691139,
                                0.00210891073181778,
                                1.10669432877135,
                                0,
                                -0.0625800748928424,
                                0.942987965681641,
                                0,
                                0,
                                0,
                                0.586425727832437,
                                -0.105634126427948,
                                0.0348727716965916,
                                0,
                                0,
                                0.245403055235735,
                                -0.286726648462724};
    const int mem_alignment = 4096;
    sparse_matrix_t A_csr = solve_lsqr(i, j, val, b, m, n, mem_alignment);

    return 0;
}
format
0 人点赞