将一个大文件按照每个文件最多 split_lines
行,分割成多个小文件,以下实现内存占用低
def split_file_by_line(file_name, split_lines):
split_files = []
file_idx = 1
line_ct = 0
with open(file_name, 'r', encoding='utf-8') as fin:
for line in fin:
if line_ct == 0:
part_file = file_name '_part_' str(file_idx)
fout = open(part_file, 'w', encoding='utf-8')
split_files.append(part_file)
fout.write(line)
line_ct = 1
if line_ct >= split_lines:
line_ct = 0
fout.close()
file_idx = 1
print(f'file: {file_name}, split lines: {split_lines}, split files num: {len(split_files)}')
return split_files
测试
代码语言:javascript复制$ cat test
1
2
3
4
$
test 文件有 5 行
代码语言:javascript复制>>> split_file_by_line('test', 2)
['test_part_1', 'test_part_2', 'test_part_3']
>>> split_file_by_line('test', 3)
['test_part_1', 'test_part_2']
>>> split_file_by_line('test', 1)
['test_part_1', 'test_part_2', 'test_part_3', 'test_part_4', 'test_part_5']
>>> split_file_by_line('test', 4)
['test_part_1', 'test_part_2']
>>> split_file_by_line('test', 5)
['test_part_1']
>>> split_file_by_line('test', 6)
['test_part_1']
>>> split_file_by_line('test', 7)
['test_part_1']