You can download this code by clicking the button below.
This code is now available for download.
This code defines a series of functions to analyze files in a specified directory, including listing files, extracting extensions, counting lines, sorting, and generating a report.
Technology Stack : os, re, sys, time, json
Code Type : Code analysis
Code Difficulty : Intermediate
import os
import re
import sys
import time
import json
def list_files(directory):
"""
列出指定目录下的所有文件名,不包括子目录。
:param directory: 指定的目录路径
:return: 包含文件名的列表
"""
file_list = []
for root, dirs, files in os.walk(directory):
for file in files:
file_list.append(os.path.join(root, file))
return file_list
def extract_extensions(file_list):
"""
从文件列表中提取所有文件的扩展名,并统计每个扩展名的出现次数。
:param file_list: 文件名列表
:return: 扩展名及其出现次数的字典
"""
extension_dict = {}
for file in file_list:
extension = os.path.splitext(file)[1]
if extension in extension_dict:
extension_dict[extension] += 1
else:
extension_dict[extension] = 1
return extension_dict
def count_lines_in_files(file_list):
"""
计算文件列表中每个文件的行数,并返回包含文件路径和行数的字典。
:param file_list: 文件名列表
:return: 文件路径和行数的字典
"""
line_counts = {}
for file in file_list:
with open(file, 'r') as f:
line_counts[file] = sum(1 for line in f)
return line_counts
def sort_files_by_line_count(file_list, line_counts):
"""
根据行数对文件列表进行排序。
:param file_list: 文件名列表
:param line_counts: 文件路径和行数的字典
:return: 按行数排序后的文件名列表
"""
return sorted(file_list, key=lambda x: line_counts[x], reverse=True)
def generate_report(sorted_files, extension_dict, line_counts):
"""
生成包含文件信息、扩展名统计和行数统计的报告。
:param sorted_files: 按行数排序后的文件名列表
:param extension_dict: 扩展名及其出现次数的字典
:param line_counts: 文件路径和行数的字典
:return: 报告的JSON字符串
"""
report = {
"files": sorted_files,
"extensions": extension_dict,
"line_counts": line_counts
}
return json.dumps(report, indent=2)
def analyze_directory(directory):
"""
分析指定目录,生成文件列表、扩展名统计、行数统计和报告。
:param directory: 指定的目录路径
:return: 生成的报告的JSON字符串
"""
file_list = list_files(directory)
extension_dict = extract_extensions(file_list)
line_counts = count_lines_in_files(file_list)
sorted_files = sort_files_by_line_count(file_list, line_counts)
return generate_report(sorted_files, extension_dict, line_counts)
# Example usage
if __name__ == "__main__":
directory_to_analyze = "/path/to/directory"
report = analyze_directory(directory_to_analyze)
print(report)