C 中文周刊 第112期
弄了个qq频道,手机qq点击进入
RSS https://github.com/wanghenshui/cppweeklynews/releases.atom
欢迎投稿,推荐或自荐文章/软件/资源等
请提交 issue
不想上班,本周内容极少。有点不想发了
资讯
标准委员会动态/ide/编译器信息放在这里
编译器信息最新动态推荐关注hellogcc公众号 本周更新 200期
200期,不容易
文章
- Did you know that C 23 extended floating-point types?
#include <stdfloat>
int main() {
std::float16_t f16 = .42f16;
std::bfloat16_t bf16 = .42bf16;
std::float32_t f32 = .42f32;
std::float64_t f64 = .42f64;
std::float128_t f128 = .42f128;
}
更多的浮点类型
- Beautiful Branchless Binary Search 直接贴代码吧
#include <stdint.h>
#include <bit>
#include <functional>
inline size_t bit_floor(size_t i) {
constexpr int num_bits = sizeof(i) * 8;
return size_t(1) << (num_bits - std::countl_zero(i) - 1);
}
inline size_t bit_ceil(size_t i) {
constexpr int num_bits = sizeof(i) * 8;
return size_t(1) << (num_bits - std::countl_zero(i - 1));
}
template<typename It, typename T, typename Cmp>
It branchless_lower_bound(It begin, It end, const T & value, Cmp && compare) {
std::size_t length = end - begin;
if (length == 0)
return end;
std::size_t step = bit_floor(length);
if (step != length && compare(begin[step], value))
{
length -= step 1;
if (length == 0)
return end;
step = bit_ceil(length);
begin = end - step;
}
for (step /= 2; step != 0; step /= 2)
{
if (compare(begin[step], value))
begin = step;
}
return begin compare(*begin, value);
}
template<typename It, typename T>
It branchless_lower_bound(It begin, It end, const T & value) {
return branchless_lower_bound(begin, end, value, std::less<>{});
}
要比std::lower_bound
快一点. TODO这里没有压测数据,后面补上
- SWAR find any byte from set
介绍 https://github.com/ada-url/ada 这个库的一些技巧,SIMD with a register
看代码
代码语言:javascript复制ada_really_inline size_t find_authority_delimiter_special(std::string_view view) noexcept {
auto has_zero_byte = [](uint64_t v) {
return ((v - 0x0101010101010101) & ~(v)&0x8080808080808080);
};
auto index_of_first_set_byte = [](uint64_t v) {
return ((((v - 1) & 0x101010101010101) * 0x101010101010101) >> 56) - 1;
};
auto broadcast = [](uint8_t v) -> uint64_t { return 0x101010101010101 * v; };
size_t i = 0;
uint64_t mask1 = broadcast('@');
uint64_t mask2 = broadcast('/');
uint64_t mask3 = broadcast('?');
uint64_t mask4 = broadcast('\');
for (; i 7 < view.size(); i = 8) {
uint64_t word{};
memcpy(&word, view.data() i, sizeof(word));
word = swap_bytes_if_big_endian(word);
uint64_t xor1 = word ^ mask1;
uint64_t xor2 = word ^ mask2;
uint64_t xor3 = word ^ mask3;
uint64_t xor4 = word ^ mask4;
uint64_t is_match = has_zero_byte(xor1) | has_zero_byte(xor2) | has_zero_byte(xor3) | has_zero_byte(xor4);
if (is_match) {
return i index_of_first_set_byte(is_match);
}
}
if (i < view.size()) {
uint64_t word{};
memcpy(&word, view.data() i, view.size() - i);
word = swap_bytes_if_big_endian(word);
uint64_t xor1 = word ^ mask1;
uint64_t xor2 = word ^ mask2;
uint64_t xor3 = word ^ mask3;
uint64_t xor4 = word ^ mask4;
uint64_t is_match = has_zero_byte(xor1) | has_zero_byte(xor2) | has_zero_byte(xor3) | has_zero_byte(xor4);
if (is_match) {
return i index_of_first_set_byte(is_match);
}
}
return view.size();
}
简单来说就是八字节来用一个比较搞定,把信息编进一个8字节里,也就是一个SIMD with a regester的含义了。这里还比较复杂,输入的是view,多个,按八字节拆分了
这里考虑一下一般场景
代码语言:javascript复制int find_authority_delimiter_special_reference(uint64_t word) noexcept {
auto has_zero_byte = [](uint64_t v) {
return ((v - 0x0101010101010101) & ~(v)&0x8080808080808080);
};
auto index_of_first_set_byte = [](uint64_t v) {
return ((((v - 1) & 0x101010101010101) * 0x101010101010101) >> 56) - 1;
};
auto broadcast = [](uint8_t v) -> uint64_t { return 0x101010101010101 * v; };
uint64_t mask1 = broadcast('@');
uint64_t mask2 = broadcast('/');
uint64_t mask3 = broadcast('?');
uint64_t mask4 = broadcast('\');
uint64_t xor1 = word ^ mask1;
uint64_t xor2 = word ^ mask2;
uint64_t xor3 = word ^ mask3;
uint64_t xor4 = word ^ mask4;
uint64_t is_match = has_zero_byte(xor1) | has_zero_byte(xor2) | has_zero_byte(xor3) | has_zero_byte(xor4);
if (is_match) {
return index_of_first_set_byte(is_match);
}
return -1;
}
作为网址相关解析的字符,这里完全可以极端一些,只考虑ascii码,这样范围又小了一些,更快了一些
然后作者手把手给你推导出这坨代码
代码语言:javascript复制long int find_authority_delimiter_special_better(uint64_t word) noexcept {
auto broadcast = [](uint8_t v) -> uint64_t { return 0x101010101010101 * v; };
uint64_t mask = broadcast(0x7f);
uint64_t lo7bits = word & mask;
uint64_t x0 = (lo7bits ^ broadcast('@')) mask;
uint64_t x1 = (lo7bits ^ broadcast('/')) mask;
uint64_t x2 = (lo7bits ^ broadcast('?')) mask;
uint64_t x3 = (lo7bits ^ broadcast('\')) mask;
uint64_t t0 = ((x0 & x1 & x2 & x3) | word);
uint64_t t1 = t0 & broadcast(0x80);
uint64_t t2 = t1 ^ broadcast(0x80);
if (t2 != 0) {
return __builtin_ctzl(t2) / 8;
}
return -1;
}
把生成的汇编扔进 https://uica.uops.info/
能看出吞吐更快
这个推导逻辑我没有看懂。有看懂的哥们可以留言告诉我
- SIMD-ized faster parse of IPv4 addresses
天书,彻底看不懂了
- Futexes: a translation dictionary
介绍各种平台futex的行为/接口/表现。不多说
- C Coroutines: Understanding the Compiler Transform
理解协程的代码转换。这个很多文章都有说过。还是值得一看的
- Rust Enums in Modern C
enum IpAddr {
V4(String),
V6(String),
}
let home = IpAddr::V4(String::from("127.0.0.1"));
代码语言:javascript复制struct InvalidIP {};
struct IPv4 {
std::string value;
};
struct IPv6 {
std::string value;
};
using IPType = std::variant<InvalidIP, IPv4, IPv6>;
IPType ipType = InvalidIP{};
if (IPv4 *ip = std::get_if<IPv4>(&ipType)) {
std::cout << ip->value << std::endl;
} else if (IPv6 *ip = std::get_if<IPv6>(&ipType)) {
std::cout << ip->value << std::endl;
} else if (InvalidIP *ip = std::get_if<InvalidIP>(&ipType)) {
std::cout << "Invalid IP" << std::endl;
}
简洁型还是差点意思
- Variadic functions vs variadic templates
常识了, 直接贴代码了
代码语言:javascript复制#include <cstdarg>
#include <iostream>
void printV(size_t count, ...) {
va_list args;
va_start(args, count);
for(size_t i = 0; i < count; i) {
std::cout << va_arg(args, int);
std::cout << " ";
}
std::cout << 'n';
va_end(args);
}
template <typename T>
void printT(T item) {
std::cout << item << ' ';
}
template <typename T, typename... Args>
void printT(T item, Args... args) {
printT(item);
printT(args...);
std::cout << 'n';
}
int main() {
printV(4, 3, 2, 1);
printV(3, 8.2, 2, 1.1);
printV(5, 23, 32, 8, 11, 9);
printT(3, 2, 1);
printT(8.2, 2, 1.1, "duck");
printT(23, 32, 8, 11, 9);
}
va_list很少见了
视频
- C Weekly - Ep 374 - C 23’s out_ptr and inout_ptr
封装c api用的
开源项目需要人手
- asteria 一个脚本语言,可嵌入,长期找人,希望胖友们帮帮忙,也可以加群384042845和作者对线
- Unilang deepin的一个通用编程语言,点子有点意思,也缺人,感兴趣的可以github讨论区或者deepin论坛看一看。这里也挂着长期推荐了
新项目介绍/版本更新
- Smaller & Faster Single-File Vector Search Engine
本文永久链接