Update GAT.py
This commit is contained in:
@@ -636,15 +636,15 @@ def remove_duplicates(input_file, output_file):
|
|||||||
if genre_line:
|
if genre_line:
|
||||||
output_lines.append(line)
|
output_lines.append(line)
|
||||||
# 将结果写入输出文件
|
# 将结果写入输出文件
|
||||||
with open(output_file, 'w', encoding='utf-8') as f:
|
with open(output_file, 'a', encoding='utf-8') as f:
|
||||||
f.writelines(output_lines)
|
f.writelines(output_lines)
|
||||||
print("去重后的行数:", len(output_lines))
|
print("去重后的行数:", len(output_lines))
|
||||||
|
|
||||||
# 使用方法
|
# 使用方法
|
||||||
remove_duplicates('去重.txt', 'gat.txt')
|
remove_duplicates('去重.txt', '网络收集.txt')
|
||||||
|
|
||||||
# 打开文档并读取所有行
|
# 打开文档并读取所有行
|
||||||
with open('gat.txt', 'r', encoding="utf-8") as file:
|
with open('网络收集.txt', 'r', encoding="utf-8") as file:
|
||||||
lines = file.readlines()
|
lines = file.readlines()
|
||||||
|
|
||||||
# 使用列表来存储唯一的行的顺序
|
# 使用列表来存储唯一的行的顺序
|
||||||
@@ -658,7 +658,7 @@ for line in lines:
|
|||||||
seen_lines.add(line)
|
seen_lines.add(line)
|
||||||
|
|
||||||
# 将唯一的行写入新的文档
|
# 将唯一的行写入新的文档
|
||||||
with open('gat.txt', 'w', encoding="utf-8") as file:
|
with open('网络收集.txt', 'w', encoding="utf-8") as file:
|
||||||
file.writelines(unique_lines)
|
file.writelines(unique_lines)
|
||||||
|
|
||||||
|
|
||||||
@@ -701,7 +701,7 @@ def append_text_between_files(file1_path, file2_path):
|
|||||||
combined_lines = unique_lines2 + unique_lines1
|
combined_lines = unique_lines2 + unique_lines1
|
||||||
with open(file2_path, 'w', encoding='utf-8') as file2:
|
with open(file2_path, 'w', encoding='utf-8') as file2:
|
||||||
file2.write('\n'.join(combined_lines))
|
file2.write('\n'.join(combined_lines))
|
||||||
file_path1 = 'gat.txt'
|
file_path1 = '网络收集.txt'
|
||||||
file_path2 = '综合源.txt'
|
file_path2 = '综合源.txt'
|
||||||
append_text_between_files(file_path1, file_path2)
|
append_text_between_files(file_path1, file_path2)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user