C++ 中文周刊 2024-06-17 第160期

2024-07-30 15:22:05 浏览数 (2)

资讯

标准委员会动态/ide/编译器信息放在这里

编译器信息最新动态推荐关注hellogcc公众号 本周更新 2024-06-12 第258期

文章

Why do Windows functions all begin with a pointless MOV EDI, EDI instruction?
https://devblogs.microsoft.com/oldnewthing/20110921-00/?p=9583

这是2011年的文章了,最近又有讨论了,简单来说就是nop,window上可以在线热补丁把这行汇编替换成jmp xx 不过这玩意也就windows 有

其实XCHG edi, edi也是一个意思

Making a bignum library for fun
https://austinhenley.com/blog/bignum1.html

实现一个大数乘法,直接看代码吧 https://github.com/AZHenley/bignum/blob/main/bignum.c

这里当字符串处理的,难度降低了很多 面试题属于是

代码语言:javascript复制
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

typedef struct {
    char *digits;
    int size;
} BigNum;

void bignum_init(BigNum *n, const char *str) {
    n->size = strlen(str);
    n->digits = malloc(n->size * sizeof(char));
    for (int i = 0; i < n->size; i  ) {
        // Store digits in reverse. Convert from ASCII.
        n->digits[i] = str[n->size - 1 - i] - '0'; 
    }
}

void bignum_free(BigNum *n) {
    free(n->digits);
    n->digits = NULL;
    n->size = 0;
}

void bignum_print(BigNum *n) {
    printf("BigNum: ");
    for (int i = n->size - 1; i >= 0; i--) {
        printf("%d", n->digits[i]);
    }
    printf("n");
}

int bignum_compare(const BigNum *a, const BigNum *b) {
    // Returns 1 if a is greater than b, -1 if less than, and 0 if equal to.
    if (a->size != b->size) {
        return a->size > b->size ? 1 : -1;
    }
    for (int i = a->size - 1; i >= 0; i--) {
        if (a->digits[i] != b->digits[i]) {
            return a->digits[i] > b->digits[i] ? 1 : -1;
        }
    }
    return 0;
}

void bignum_add(BigNum *result, const BigNum *a, const BigNum *b) {
    int max_size = a->size > b->size ? a->size : b->size;
    result->digits = malloc((max_size   1) * sizeof(char));
    int carry = 0;
    int i;

    for (i = 0; i < max_size || carry; i  ) {
        int sum = carry   (i < a->size ? a->digits[i] : 0)   (i < b->size ? b->digits[i] : 0);
        result->digits[i] = sum % 10; // Store the last digit of the sum.
        carry = sum / 10; // Carry any overflow.
    }
    result->size = i;
}

void bignum_multiply(BigNum *result, const BigNum *a, const BigNum *b) {
    result->digits = calloc(a->size   b->size, sizeof(char));
    result->size = a->size   b->size; // Max size we will need.

    for (int i = 0; i < a->size; i  ) {
        for (int j = 0; j < b->size; j  ) {
            int index = i   j;
            result->digits[index]  = a->digits[i] * b->digits[j];
            result->digits[index   1]  = result->digits[index] / 10;
            result->digits[index] %= 10;
        }
    }
    
    // Trim any leading zeros.
    while (result->size > 1 && result->digits[result->size - 1] == 0) {
        result->size--;
    }
}

int main() {
    BigNum a, b, sum, product;

    bignum_init(&a, "12345678901234567890");
    bignum_init(&b, "98765432109876543210");

    bignum_print(&a);
    bignum_print(&b);

    bignum_add(&sum, &a, &b);
    bignum_print(&sum);
    bignum_free(&sum);

    bignum_multiply(&product, &a, &b);
    bignum_print(&product);
    bignum_free(&product);

    int cmp_result = bignum_compare(&a, &b);
    if (cmp_result > 0) {
        printf("a is greater than bn");
    } else if (cmp_result < 0) {
        printf("a is less than bn");
    } else {
        printf("a is equal to bn");
    }

    bignum_free(&a);
    bignum_free(&b);

    return 0;
}
Using namespaces effectively
https://biowpn.github.io/bioweapon/2024/06/05/using-namespaces-effectively.html

不要头文件直接使用using namespace 放在cpp文件中或者函数里

另外 namespace 版本控制,两种玩法

代码语言:javascript复制
namespace gem {
    namespace v1 {
        struct Point {
            int x;
            int y;
        };
    }
    namespace v2 {
        struct Point {
            int y; // y goes first in v2
            int x;
        };
    }
using namespace v1; // pull everything under v1 out
}
代码语言:javascript复制
代码语言:javascript复制
namespace gem {
    inline namespace v1 {
        ...
    }
    namespace v2 {
        ...
    }
}

推荐用inline,少写一行

其实是c 11之前有bug,但是我觉得还是不知道这个bug比较好,就当少写一行

What’s the deal with std::type_identity?

https://devblogs.microsoft.com/oldnewthing/20240607-00/?p=109865

代码语言:javascript复制
template<typename T>
struct type_identity {
    using type = T;
};

感觉很垃圾,这是干嘛的?

举个例子

代码语言:javascript复制
template<typename T>
T add(T a, T b) {
    return a   b;
}

auto sum = add(0.5, 1); // error: cannot deduce T

报错了,这怎么办?主要是类型匹配太多,可以用identity建立关系,消除冲突的重载

代码语言:javascript复制
template<typename T>
T add(T a, std::type_identity_t<T> b) {
    return a   b;
}
auto sum = add(0.5, 1); // T is "double"

来个现实例子

代码语言:javascript复制
void enqueue(std::function<void(void)> const& work);

template<typename...Args>
void enqueue(std::function<void(Args...)> const& work,
    Args...args)
{
    enqueue([=] { work(args...); });
}
enqueue([](int v) { std::cout << v; }, 42); // 编译不过

这俩enqueue明显有推导关系 为啥编译不过呢,因为lambda可能匹配两个enqueue

需要一个限制

代码语言:javascript复制
template<typename...Args>
void enqueue(
    std::type_identity_t<
        std::function<void(Args...)>
    > const& work,
    Args...args)
{
    enqueue([=] { work(args...); });
}

当然改成这样也行

代码语言:javascript复制
template<typename...Args>
void enqueue(
    std::function<void(
        std::type_identity_t<Args>...
    )> const& work,
    Args...args)
{
    enqueue([=] { work(args...); });
}

当然还可以这样改

代码语言:javascript复制
template<typename...Args>
void enqueue(
    std::function<void(
        std::decay_t<Args>...
    )> const& work,
    Args&&...args)
{
    enqueue([work, ...args = std::forward<Args>(args)]
            { work(std::move(args)...); });
}

当然也可以这样改,chatgpt老师教的

代码语言:javascript复制
template<typename Func, typename... Args>
void enqueue(Func&& func, Args&&... args)
{
    enqueue([=]() { std::invoke(std::forward<Func>(func), std::forward<Args>(args)...); });
}

想了解更多可以看提案哈 https://open-std.org/jtc1/sc22/wg21/docs/papers/2018/p0887r1.pdf

The limits of [[maybe_unused]] https://www.sandordargo.com/blog/2024/06/05/the-limits-of-maybe-unused

不是特别好用,void忽略也能对服用

Destructuring Lambda Expression Parameters https://vector-of-bool.github.io/2024/06/13/lambda-destructure.html

结构化绑定很好用

代码语言:javascript复制
int sum(tuple<int, int, int> triple) {
  auto [a, b, c] = triple;
  return a   b   c;
}

结构化绑定的值如果能直接传给lambda是不是很帅

比如这样

代码语言:javascript复制
extern bool is_good(string s, int v);

auto foo(map<string, int> items) {
  return views::filter(
    items,
    // 编不过
    [](auto [key, value]) {
      return is_good(key, value);
    });
}

写一个转发吧,其实就是

代码语言:javascript复制
inline constexpr struct {
  template <typename F>
  constexpr auto operator%(F&& fn) const {
    return [fn](auto&& tpl) {
      return std::apply(fn, FWD(tpl));
    };
  }
} spread_args;

上面的代码就可以这样了

代码语言:javascript复制
auto foo(map<string, int> items) {
  return views::filter(
    items,
    // Works!
    spread_args % [](auto key, auto value) {
      return is_good(key, value);
    });
}

甚至可以更简单点

代码语言:javascript复制
auto foo(map<string, int> items) {
  return views::filter(items, spread_args % is_good);
}
Scan HTML faster with SIMD instructions: Chrome edition https://lemire.me/blog/2024/06/08/scan-html-faster-with-simd-instructions-chrome-edition/

常规

代码语言:javascript复制
void NaiveAdvanceString(const char *&start, const char *end) {
  for (;start < end; start  ) {
    if(*start == '<' || *start == '&' 
        || *start == 'r' || *start == '') {
      return;
    }
  }
}

常规sse

代码语言:javascript复制
void AdvanceString(const char*& start, const char* end) {
    const __m128i quote_mask = _mm_set1_epi8('<');
    const __m128i escape_mask = _mm_set1_epi8('&');
    const __m128i newline_mask = _mm_set1_epi8('r');
    const __m128i zero_mask = _mm_set1_epi8('');

    static constexpr auto stride = 16;
    for (; start   (stride - 1) < end; start  = stride) {
        __m128i data = _mm_loadu_si128(
           reinterpret_cast<const __m128i*>(start));
        __m128i quotes = _mm_cmpeq_epi8(data, quote_mask);
        __m128i escapes = _mm_cmpeq_epi8(data, escape_mask);
        __m128i newlines = _mm_cmpeq_epi8(data, newline_mask);
        __m128i zeros = _mm_cmpeq_epi8(data, zero_mask);
        __m128i mask = _mm_or_si128(_mm_or_si128(quotes, zeros),                   
             _mm_or_si128(escapes, newlines));
        int m = _mm_movemask_epi8(mask);
        if (m != 0) {
            start  = __builtin_ctz(m);
            return;
        }
    }

    // Process any remaining bytes (less than 16)
    while (start < end) {
        if (*start == '<' || *start == '&' 
             || *start == 'r' || *start == '') {
            return;
        }
        start  ;
    }
}

查表 sse

代码语言:javascript复制
void AdvanceStringTable(const char *&start, const char *end) {
  uint8x16_t low_nibble_mask = {0b0001, 0, 0, 0, 0, 0, 0b0100, 
          0, 0, 0, 0, 0, 0b0010, 0b1000, 0, 0};
  uint8x16_t high_nibble_mask = {0b1001, 0, 0b0100, 0b0010, 
          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
  uint8x16_t v0f = vmovq_n_u8(0xf);
  uint8x16_t bit_mask = {16, 15, 14, 13, 12, 11, 10, 9, 8,
                            7, 6, 5, 4, 3, 2, 1};
  static constexpr auto stride = 16;
  for (; start   (stride - 1) < end; start  = stride) {
    uint8x16_t data = vld1q_u8(reinterpret_cast<const uint8_t *>(start));
    uint8x16_t lowpart = vqtbl1q_u8(low_nibble_mask, vandq_u8(data, v0f));
    uint8x16_t highpart = vqtbl1q_u8(high_nibble_mask,  
           vshrq_n_u8(data, 4));
    uint8x16_t classify = vandq_u8(lowpart, highpart);
    uint8x16_t matchesones = vtstq_u8(classify, vdupq_n_u8(0xFF));
    uint8x16_t matches = vandq_u8(bit_mask, matchesones);
    int m = vmaxvq_u8(matches);
    if(m != 0) {
      start  = 16 - m;
      return;
    }
  }  
  for (;start < end; start  ) {
    if(*start == '<' || *start == '&' || *start == 'r' 
     || *start == '') {
      return;
    }
  }
}

一次查表

代码语言:javascript复制
void AdvanceStringTableSimpler(const char *&start, const char *end) {
  uint8x16_t low_nibble_mask = {0, 0, 0, 0, 0, 0, 0x26, 0, 0, 
                            0, 0, 0, 0x3c, 0xd, 0, 0};
  uint8x16_t v0f = vmovq_n_u8(0xf);
  uint8x16_t bit_mask = {16, 15, 14, 13, 12, 11, 10, 9, 8,
                            7, 6, 5, 4, 3, 2, 1};
  static constexpr auto stride = 16;
  for (; start   (stride - 1) < end; start  = stride) {
    uint8x16_t data = vld1q_u8(reinterpret_cast<const uint8_t *>(start));
    uint8x16_t lowpart = vqtbl1q_u8(low_nibble_mask, vandq_u8(data, v0f));
    uint8x16_t matchesones = vceqq_u8(lowpart, data);
    uint8x16_t matches = vandq_u8(bit_mask, matchesones);
    int m = vmaxvq_u8(matches);
    if(m != 0) {
      start  = 16 - m;
      return;
    }
  }  
  for (;start < end; start  ) {
    if(*start == '<' || *start == '&' 
     || *start == 'r' || *start == '') {
      return;
    }
  }
}

我都看不懂

0 人点赞