File Analysis Toolkit

  • Share this:

Code introduction


This code defines a series of functions to analyze files in a specified directory, including listing files, extracting extensions, counting lines, sorting, and generating a report.


Technology Stack : os, re, sys, time, json

Code Type : Code analysis

Code Difficulty : Intermediate


                
                    
import os
import re
import sys
import time
import json

def list_files(directory):
    """
    列出指定目录下的所有文件名,不包括子目录。

    :param directory: 指定的目录路径
    :return: 包含文件名的列表
    """
    file_list = []
    for root, dirs, files in os.walk(directory):
        for file in files:
            file_list.append(os.path.join(root, file))
    return file_list

def extract_extensions(file_list):
    """
    从文件列表中提取所有文件的扩展名,并统计每个扩展名的出现次数。

    :param file_list: 文件名列表
    :return: 扩展名及其出现次数的字典
    """
    extension_dict = {}
    for file in file_list:
        extension = os.path.splitext(file)[1]
        if extension in extension_dict:
            extension_dict[extension] += 1
        else:
            extension_dict[extension] = 1
    return extension_dict

def count_lines_in_files(file_list):
    """
    计算文件列表中每个文件的行数,并返回包含文件路径和行数的字典。

    :param file_list: 文件名列表
    :return: 文件路径和行数的字典
    """
    line_counts = {}
    for file in file_list:
        with open(file, 'r') as f:
            line_counts[file] = sum(1 for line in f)
    return line_counts

def sort_files_by_line_count(file_list, line_counts):
    """
    根据行数对文件列表进行排序。

    :param file_list: 文件名列表
    :param line_counts: 文件路径和行数的字典
    :return: 按行数排序后的文件名列表
    """
    return sorted(file_list, key=lambda x: line_counts[x], reverse=True)

def generate_report(sorted_files, extension_dict, line_counts):
    """
    生成包含文件信息、扩展名统计和行数统计的报告。

    :param sorted_files: 按行数排序后的文件名列表
    :param extension_dict: 扩展名及其出现次数的字典
    :param line_counts: 文件路径和行数的字典
    :return: 报告的JSON字符串
    """
    report = {
        "files": sorted_files,
        "extensions": extension_dict,
        "line_counts": line_counts
    }
    return json.dumps(report, indent=2)

def analyze_directory(directory):
    """
    分析指定目录,生成文件列表、扩展名统计、行数统计和报告。

    :param directory: 指定的目录路径
    :return: 生成的报告的JSON字符串
    """
    file_list = list_files(directory)
    extension_dict = extract_extensions(file_list)
    line_counts = count_lines_in_files(file_list)
    sorted_files = sort_files_by_line_count(file_list, line_counts)
    return generate_report(sorted_files, extension_dict, line_counts)

# Example usage
if __name__ == "__main__":
    directory_to_analyze = "/path/to/directory"
    report = analyze_directory(directory_to_analyze)
    print(report)                
              
Tags: