常量折叠instcombine pass优化实例
下面函数foo中存在冗余变量计算、赋值语句,使用instcombine优化的效果:
代码语言:javascript复制#include <stdio.h>
int foo()
{
int x = 500 100;
int y = 200 * 2;
int z = 300;
int a = 200 x y (100 * (1 1));
printf("Sum: %dn", a);
return 0;
}
int main()
{
foo();
return 0;
}
clang -emit-llvm -S hello.c -o hello.ll
注意:IR代码带optnone标记,后续无法优化
代码语言:javascript复制; Function Attrs: noinline nounwind optnone uwtable
define dso_local i32 @foo() #0 {
%1 = alloca i32, align 4
%2 = alloca i32, align 4
%3 = alloca i32, align 4
%4 = alloca i32, align 4
store i32 600, ptr %1, align 4
store i32 400, ptr %2, align 4
store i32 300, ptr %3, align 4
%5 = load i32, ptr %1, align 4
%6 = add nsw i32 200, %5
%7 = load i32, ptr %2, align 4
%8 = add nsw i32 %6, %7
%9 = add nsw i32 %8, 200
store i32 %9, ptr %4, align 4
= load i32, ptr %4, align 4
= call i32 (ptr, ...) @printf(ptr noundef @.str, i32 noundef )
ret i32 0
}
需要使用这条命令编译,生成不带optnone的IR
clang -emit-llvm -Xclang -disable-O0-optnone -S hello.c -o hello.ll
; ModuleID = 'hello.c'
source_filename = "hello.c"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
@.str = private unnamed_addr constant [9 x i8] c"Sum: %d A 0", align 1
; Function Attrs: noinline nounwind uwtable
define dso_local i32 @foo() #0 {
%1 = alloca i32, align 4
%2 = alloca i32, align 4
%3 = alloca i32, align 4
%4 = alloca i32, align 4
store i32 600, ptr %1, align 4 // ***** int x = 500 100;
store i32 400, ptr %2, align 4 // ***** int y = 200 * 2;
store i32 300, ptr %3, align 4 // ***** int z = 300;
%5 = load i32, ptr %1, align 4
%6 = add nsw i32 200, %5 // ***** int a = 200 ...
%7 = load i32, ptr %2, align 4
%8 = add nsw i32 %6, %7
%9 = add nsw i32 %8, 200 // ***** int a = ... (100 * (1 1));
store i32 %9, ptr %4, align 4
= load i32, ptr %4, align 4
= call i32 (ptr, ...) @printf(ptr noundef @.str, i32 noundef )
ret i32 0
}
declare i32 @printf(ptr noundef, ...) #1
; Function Attrs: noinline nounwind uwtable
define dso_local i32 @main() #0 {
%1 = alloca i32, align 4
store i32 0, ptr %1, align 4
%2 = call i32 @foo()
ret i32 0
}
attributes #0 = { noinline nounwind uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"=" cx8, fxsr, mmx, sse, sse2, x87" "tune-cpu"="generic" }
attributes #1 = { "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"=" cx8, fxsr, mmx, sse, sse2, x87" "tune-cpu"="generic" }
!llvm.module.flags = !{!0, !1, !2, !3, !4}
!llvm.ident = !{!5}
!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{i32 8, !"PIC Level", i32 2}
!2 = !{i32 7, !"PIE Level", i32 2}
!3 = !{i32 7, !"uwtable", i32 2}
!4 = !{i32 7, !"frame-pointer", i32 2}
!5 = !{!"clang version 16.0.6 (https://github.com/llvm/llvm-project.git 7cbf1a2591520c2491aa35339f227775f4d3adf6)"}
优化后
opt -passes=instcombine -S hello.ll -o hellonew.ll
; ModuleID = 'hello.ll'
source_filename = "hello.c"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
@.str = private unnamed_addr constant [9 x i8] c"Sum: %d A 0", align 1
; Function Attrs: noinline nounwind uwtable
define dso_local i32 @foo() #0 {
%1 = call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str, i32 noundef 1400) #2
ret i32 0
}
declare i32 @printf(ptr noundef, ...) #1
; Function Attrs: noinline nounwind uwtable
define dso_local i32 @main() #0 {
%1 = call i32 @foo()
ret i32 0
}
attributes #0 = { noinline nounwind uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"=" cx8, fxsr, mmx, sse, sse2, x87" "tune-cpu"="generic" }
attributes #1 = { "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"=" cx8, fxsr, mmx, sse, sse2, x87" "tune-cpu"="generic" }
attributes #2 = { nounwind }
!llvm.module.flags = !{!0, !1, !2, !3, !4}
!llvm.ident = !{!5}
!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{i32 8, !"PIC Level", i32 2}
!2 = !{i32 7, !"PIE Level", i32 2}
!3 = !{i32 7, !"uwtable", i32 2}
!4 = !{i32 7, !"frame-pointer", i32 2}
!5 = !{!"clang version 16.0.6 (https://github.com/llvm/llvm-project.git 7cbf1a2591520c2491aa35339f227775f4d3adf6)"}
常量全部折叠了:
代码语言:javascript复制; Function Attrs: noinline nounwind uwtable
define dso_local i32 @foo() #0 {
%1 = call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str, i32 noundef 1400) #2
ret i32 0
}