Browse Source
import os import re common_words = set([ 'about', 'after', 'all', 'also', 'an', 'and', 'any', 'are', 'as', 'at', 'be', 'because', 'but', 'by', 'can', 'come', 'could', 'day', 'do', 'even', 'first', 'for', 'get', 'give', 'go', 'has', 'have', 'he', 'her', 'him', 'his', 'how', 'I', 'in', 'into', 'it', 'its', 'just', 'know', 'like', 'look', 'make', 'man', 'many', 'me', 'more', 'my', 'new', 'no', 'not', 'now', 'of', 'one', 'only', 'or', 'other', 'our', 'out', 'over', 'people', 'say', 'see', 'she', 'so', 'some', 'take', 'tell', 'than', 'their', 'them', 'then', 'there', 'these', 'they', 'think', 'this', 'time', 'two', 'up', 'use', 'very', 'want', 'was', 'way', 'we', 'well', 'what', 'when', 'which', 'who', 'will', 'with', 'would', 'year', 'you', 'your' ]) valid_extensions = set([ 'c', 'h', 'yaml', 'cmake', 'conf', 'txt', 'overlay', 'rst', 'dtsi', 'Kconfig', 'dts', 'defconfig', 'yml', 'ld', 'sh', 'py', 'soc', 'cfg' ]) def filter_repeated_words(text): # Split the text into lines lines = text.split('\n') # Combine lines into a single string with unique separator combined_text = '/*sep*/'.join(lines) # Replace repeated words within a line def replace_within_line(match): return match.group(1) # Regex for matching repeated words within a line within_line_pattern = re.compile(r'\b(' + '|'.join(map(re.escape, common_words)) + r')\b\s+\b\1\b') combined_text = within_line_pattern. sub(replace_within_line, combined_text) # Replace repeated words across line boundaries def replace_across_lines(match): return match.group(1) + match.group(2) # Regex for matching repeated words across line boundaries across_lines_pattern = re. compile(r'\b(' + '|'.join( map(re.escape, common_words)) + r')\b(\s*[*\/\n\s]*)\b\1\b') combined_text = across_lines_pattern. sub(replace_across_lines, combined_text) # Split the text back into lines filtered_text = combined_text.split('/*sep*/') return '\n'.join(filtered_text) def process_file(file_path): with open(file_path, 'r', encoding='utf-8') as file: text = file.read() new_text = filter_repeated_words(text) with open(file_path, 'w', encoding='utf-8') as file: file.write(new_text) def process_directory(directory_path): for root, dirs, files in os.walk(directory_path): dirs[:] = [d for d in dirs if not d.startswith('.')] for file in files: # Filter out hidden files if file.startswith('.'): continue file_extension = file.split('.')[-1] if file_extension in valid_extensions: # 只处理指定后缀的文件 file_path = os.path.join(root, file) print(f"Processed file: {file_path}") process_file(file_path) directory_to_process = "/home/mi/works/github/zephyrproject/zephyr" process_directory(directory_to_process) Signed-off-by: Lingao Meng <menglingao@xiaomi.com>pull/74948/head
59 changed files with 68 additions and 68 deletions
Loading…
Reference in new issue