C++ 中文周刊 第112期

2023-05-09 10:24:05 浏览数 (1)

C 中文周刊 第112期

弄了个qq频道,手机qq点击进入

RSS https://github.com/wanghenshui/cppweeklynews/releases.atom

欢迎投稿,推荐或自荐文章/软件/资源等

请提交 issue

不想上班,本周内容极少。有点不想发了

资讯

标准委员会动态/ide/编译器信息放在这里

编译器信息最新动态推荐关注hellogcc公众号 本周更新 200期

200期,不容易

文章

  • Did you know that C 23 extended floating-point types?
代码语言:javascript复制
#include <stdfloat>

int main() {
    std::float16_t   f16 = .42f16;
    std::bfloat16_t bf16 = .42bf16;
    std::float32_t   f32 = .42f32;
    std::float64_t   f64 = .42f64;
    std::float128_t f128 = .42f128;
}

更多的浮点类型

  • Beautiful Branchless Binary Search 直接贴代码吧
代码语言:javascript复制
#include <stdint.h>
#include <bit>
#include <functional>

inline size_t bit_floor(size_t i) {
    constexpr int num_bits = sizeof(i) * 8;
    return size_t(1) << (num_bits - std::countl_zero(i) - 1);
}
inline size_t bit_ceil(size_t i) {
    constexpr int num_bits = sizeof(i) * 8;
    return size_t(1) << (num_bits - std::countl_zero(i - 1));
}

template<typename It, typename T, typename Cmp>
It branchless_lower_bound(It begin, It end, const T & value, Cmp && compare) {
    std::size_t length = end - begin;
    if (length == 0)
        return end;
    std::size_t step = bit_floor(length);
    if (step != length && compare(begin[step], value))
    {
        length -= step   1;
        if (length == 0)
            return end;
        step = bit_ceil(length);
        begin = end - step;
    }
    for (step /= 2; step != 0; step /= 2)
    {
        if (compare(begin[step], value))
            begin  = step;
    }
    return begin   compare(*begin, value);
}

template<typename It, typename T>
It branchless_lower_bound(It begin, It end, const T & value) {
    return branchless_lower_bound(begin, end, value, std::less<>{});
}

要比std::lower_bound快一点. TODO这里没有压测数据,后面补上

  • SWAR find any byte from set

介绍 https://github.com/ada-url/ada 这个库的一些技巧,SIMD with a register

看代码

代码语言:javascript复制
ada_really_inline size_t find_authority_delimiter_special(std::string_view view) noexcept {
  auto has_zero_byte = [](uint64_t v) {
    return ((v - 0x0101010101010101) & ~(v)&0x8080808080808080);
  };
  auto index_of_first_set_byte = [](uint64_t v) {
    return ((((v - 1) & 0x101010101010101) * 0x101010101010101) >> 56) - 1;
  };
  auto broadcast = [](uint8_t v) -> uint64_t { return 0x101010101010101 * v; };
  size_t i = 0;
  uint64_t mask1 = broadcast('@');
  uint64_t mask2 = broadcast('/');
  uint64_t mask3 = broadcast('?');
  uint64_t mask4 = broadcast('\');

  for (; i   7 < view.size(); i  = 8) {
    uint64_t word{};
    memcpy(&word, view.data()   i, sizeof(word));
    word = swap_bytes_if_big_endian(word);
    uint64_t xor1 = word ^ mask1;
    uint64_t xor2 = word ^ mask2;
    uint64_t xor3 = word ^ mask3;
    uint64_t xor4 = word ^ mask4;
    uint64_t is_match = has_zero_byte(xor1) | has_zero_byte(xor2) | has_zero_byte(xor3) | has_zero_byte(xor4);
    if (is_match) {
      return i   index_of_first_set_byte(is_match);
    }
  }

  if (i < view.size()) {
    uint64_t word{};
    memcpy(&word, view.data()   i, view.size() - i);
    word = swap_bytes_if_big_endian(word);
    uint64_t xor1 = word ^ mask1;
    uint64_t xor2 = word ^ mask2;
    uint64_t xor3 = word ^ mask3;
    uint64_t xor4 = word ^ mask4;
    uint64_t is_match = has_zero_byte(xor1) | has_zero_byte(xor2) | has_zero_byte(xor3) | has_zero_byte(xor4);
    if (is_match) {
      return i   index_of_first_set_byte(is_match);
    }
  }

  return view.size();
}

简单来说就是八字节来用一个比较搞定,把信息编进一个8字节里,也就是一个SIMD with a regester的含义了。这里还比较复杂,输入的是view,多个,按八字节拆分了

这里考虑一下一般场景

代码语言:javascript复制
int find_authority_delimiter_special_reference(uint64_t word) noexcept {
  auto has_zero_byte = [](uint64_t v) {
    return ((v - 0x0101010101010101) & ~(v)&0x8080808080808080);
  };
  auto index_of_first_set_byte = [](uint64_t v) {
    return ((((v - 1) & 0x101010101010101) * 0x101010101010101) >> 56) - 1;
  };
  auto broadcast = [](uint8_t v) -> uint64_t { return 0x101010101010101 * v; };
  uint64_t mask1 = broadcast('@');
  uint64_t mask2 = broadcast('/');
  uint64_t mask3 = broadcast('?');
  uint64_t mask4 = broadcast('\');

  uint64_t xor1 = word ^ mask1;
  uint64_t xor2 = word ^ mask2;
  uint64_t xor3 = word ^ mask3;
  uint64_t xor4 = word ^ mask4;
  uint64_t is_match = has_zero_byte(xor1) | has_zero_byte(xor2) | has_zero_byte(xor3) | has_zero_byte(xor4);
  if (is_match) {
      return index_of_first_set_byte(is_match);
  }

  return -1;
}

作为网址相关解析的字符,这里完全可以极端一些,只考虑ascii码,这样范围又小了一些,更快了一些

然后作者手把手给你推导出这坨代码

代码语言:javascript复制
long int find_authority_delimiter_special_better(uint64_t word) noexcept {
  auto broadcast = [](uint8_t v) -> uint64_t { return 0x101010101010101 * v; };

  uint64_t mask = broadcast(0x7f);
  uint64_t lo7bits = word & mask;

  uint64_t x0 = (lo7bits ^ broadcast('@'))   mask;
  uint64_t x1 = (lo7bits ^ broadcast('/'))   mask;
  uint64_t x2 = (lo7bits ^ broadcast('?'))   mask;
  uint64_t x3 = (lo7bits ^ broadcast('\'))   mask;

  uint64_t t0 = ((x0 & x1 & x2 & x3) | word);
  uint64_t t1 = t0 & broadcast(0x80);
  uint64_t t2 = t1 ^ broadcast(0x80);

  if (t2 != 0) {
    return __builtin_ctzl(t2) / 8;
  }

  return -1;
}

把生成的汇编扔进 https://uica.uops.info/

能看出吞吐更快

这个推导逻辑我没有看懂。有看懂的哥们可以留言告诉我

  • SIMD-ized faster parse of IPv4 addresses

天书,彻底看不懂了

  • Futexes: a translation dictionary

介绍各种平台futex的行为/接口/表现。不多说

  • C Coroutines: Understanding the Compiler Transform

理解协程的代码转换。这个很多文章都有说过。还是值得一看的

  • Rust Enums in Modern C
代码语言:javascript复制
enum IpAddr {
    V4(String),
    V6(String),
}

let home = IpAddr::V4(String::from("127.0.0.1"));
代码语言:javascript复制
struct InvalidIP {};

struct IPv4 {
  std::string value;
};

struct IPv6 {
  std::string value;
};

using IPType = std::variant<InvalidIP, IPv4, IPv6>;


IPType ipType = InvalidIP{};

if (IPv4 *ip = std::get_if<IPv4>(&ipType)) {
    std::cout << ip->value << std::endl;
} else if (IPv6 *ip = std::get_if<IPv6>(&ipType)) {
    std::cout << ip->value << std::endl;
} else if (InvalidIP *ip = std::get_if<InvalidIP>(&ipType)) {
    std::cout << "Invalid IP" << std::endl;
}

简洁型还是差点意思

  • Variadic functions vs variadic templates

常识了, 直接贴代码了

代码语言:javascript复制
#include <cstdarg>
#include <iostream>

void printV(size_t count, ...) {
    va_list args;
    va_start(args, count);
    for(size_t i = 0; i < count;   i) {
      std::cout << va_arg(args, int);
      std::cout << " ";
    }
    std::cout << 'n';
    va_end(args);
}


template <typename T>
void printT(T item) {
  std::cout << item << ' ';
}

template <typename T, typename... Args>
void printT(T item, Args... args) {
  printT(item);
  printT(args...);
  std::cout << 'n';
}

int main() {
  printV(4, 3, 2, 1);
  printV(3, 8.2, 2, 1.1);
  printV(5, 23, 32, 8, 11, 9);
  printT(3, 2, 1);
  printT(8.2, 2, 1.1, "duck");
  printT(23, 32, 8, 11, 9);
}

va_list很少见了

视频

  • C Weekly - Ep 374 - C 23’s out_ptr and inout_ptr

封装c api用的

开源项目需要人手

  • asteria 一个脚本语言,可嵌入,长期找人,希望胖友们帮帮忙,也可以加群384042845和作者对线
  • Unilang deepin的一个通用编程语言,点子有点意思,也缺人,感兴趣的可以github讨论区或者deepin论坛看一看。这里也挂着长期推荐了

新项目介绍/版本更新

  • Smaller & Faster Single-File Vector Search Engine

本文永久链接

0 人点赞