LLVM(4)常量折叠instcombine pass优化实例

2023-10-13 10:29:13 浏览数 (2)

常量折叠instcombine pass优化实例

下面函数foo中存在冗余变量计算、赋值语句,使用instcombine优化的效果:

代码语言:javascript复制
#include <stdio.h>

int foo()
{
  int x = 500   100;
  int y = 200 * 2;
  int z = 300;
  int a = 200   x   y   (100 * (1 1));
  printf("Sum: %dn", a);
  return 0;
}

int main()
{
  foo();
  return 0;
}

clang -emit-llvm -S hello.c -o hello.ll

注意:IR代码带optnone标记,后续无法优化

代码语言:javascript复制
; Function Attrs: noinline nounwind optnone uwtable
define dso_local i32 @foo() #0 {
  %1 = alloca i32, align 4
  %2 = alloca i32, align 4
  %3 = alloca i32, align 4
  %4 = alloca i32, align 4
  store i32 600, ptr %1, align 4
  store i32 400, ptr %2, align 4
  store i32 300, ptr %3, align 4
  %5 = load i32, ptr %1, align 4
  %6 = add nsw i32 200, %5
  %7 = load i32, ptr %2, align 4
  %8 = add nsw i32 %6, %7
  %9 = add nsw i32 %8, 200
  store i32 %9, ptr %4, align 4
   = load i32, ptr %4, align 4
   = call i32 (ptr, ...) @printf(ptr noundef @.str, i32 noundef )
  ret i32 0
}

需要使用这条命令编译,生成不带optnone的IR

clang -emit-llvm -Xclang -disable-O0-optnone -S hello.c -o hello.ll

代码语言:javascript复制
; ModuleID = 'hello.c'
source_filename = "hello.c"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

@.str = private unnamed_addr constant [9 x i8] c"Sum: %dA0", align 1

; Function Attrs: noinline nounwind uwtable
define dso_local i32 @foo() #0 {
  %1 = alloca i32, align 4
  %2 = alloca i32, align 4
  %3 = alloca i32, align 4
  %4 = alloca i32, align 4
  store i32 600, ptr %1, align 4        // *****  int x = 500   100;
  store i32 400, ptr %2, align 4        // *****  int y = 200 * 2;
  store i32 300, ptr %3, align 4        // *****  int z = 300;
  %5 = load i32, ptr %1, align 4
  %6 = add nsw i32 200, %5              // ***** int a = 200   ...
  %7 = load i32, ptr %2, align 4
  %8 = add nsw i32 %6, %7
  %9 = add nsw i32 %8, 200              // ***** int a = ...   (100 * (1 1));
  store i32 %9, ptr %4, align 4
   = load i32, ptr %4, align 4
   = call i32 (ptr, ...) @printf(ptr noundef @.str, i32 noundef )
  ret i32 0
}

declare i32 @printf(ptr noundef, ...) #1

; Function Attrs: noinline nounwind uwtable
define dso_local i32 @main() #0 {
  %1 = alloca i32, align 4
  store i32 0, ptr %1, align 4
  %2 = call i32 @foo()
  ret i32 0
}

attributes #0 = { noinline nounwind uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"=" cx8, fxsr, mmx, sse, sse2, x87" "tune-cpu"="generic" }
attributes #1 = { "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"=" cx8, fxsr, mmx, sse, sse2, x87" "tune-cpu"="generic" }

!llvm.module.flags = !{!0, !1, !2, !3, !4}
!llvm.ident = !{!5}

!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{i32 8, !"PIC Level", i32 2}
!2 = !{i32 7, !"PIE Level", i32 2}
!3 = !{i32 7, !"uwtable", i32 2}
!4 = !{i32 7, !"frame-pointer", i32 2}
!5 = !{!"clang version 16.0.6 (https://github.com/llvm/llvm-project.git 7cbf1a2591520c2491aa35339f227775f4d3adf6)"}

优化后

opt -passes=instcombine -S hello.ll -o hellonew.ll

代码语言:javascript复制
; ModuleID = 'hello.ll'
source_filename = "hello.c"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

@.str = private unnamed_addr constant [9 x i8] c"Sum: %dA0", align 1

; Function Attrs: noinline nounwind uwtable
define dso_local i32 @foo() #0 {
  %1 = call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str, i32 noundef 1400) #2
  ret i32 0
}

declare i32 @printf(ptr noundef, ...) #1

; Function Attrs: noinline nounwind uwtable
define dso_local i32 @main() #0 {
  %1 = call i32 @foo()
  ret i32 0
}

attributes #0 = { noinline nounwind uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"=" cx8, fxsr, mmx, sse, sse2, x87" "tune-cpu"="generic" }
attributes #1 = { "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"=" cx8, fxsr, mmx, sse, sse2, x87" "tune-cpu"="generic" }
attributes #2 = { nounwind }

!llvm.module.flags = !{!0, !1, !2, !3, !4}
!llvm.ident = !{!5}

!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{i32 8, !"PIC Level", i32 2}
!2 = !{i32 7, !"PIE Level", i32 2}
!3 = !{i32 7, !"uwtable", i32 2}
!4 = !{i32 7, !"frame-pointer", i32 2}
!5 = !{!"clang version 16.0.6 (https://github.com/llvm/llvm-project.git 7cbf1a2591520c2491aa35339f227775f4d3adf6)"}

常量全部折叠了:

代码语言:javascript复制
; Function Attrs: noinline nounwind uwtable
define dso_local i32 @foo() #0 {
  %1 = call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str, i32 noundef 1400) #2
  ret i32 0
}

0 人点赞