--- - name: Analyze Disk Space and Identify Large Directories hosts: all gather_facts: true vars: scan_paths: - "/" - "/var" - "/home" - "/opt" - "/usr" - "/tmp" max_depth: 5 size_threshold_gb: 1 output_file: "/tmp/disk_space_report_{{ ansible_date_time.iso8601_basic_short }}.json" tasks: - name: Get overall disk usage shell: df -h register: df_output changed_when: false - name: Parse disk usage information set_fact: disk_usage: >- {{ df_output.stdout_lines[1:] | map('regex_replace', '^([^\s]+)\s+([^\s]+)\s+([^\s]+)\s+([^\s]+)\s+([^\s]+)\s+([^\s]+)$', '{"device": "\\1", "size": "\\2", "used": "\\3", "available": "\\4", "percent": "\\5", "mount": "\\6"}') | map('from_json') | list }} - name: Find directories exceeding size threshold find: paths: "{{ item }}" file_type: directory recurse: false register: dir_list loop: "{{ scan_paths }}" failed_when: false - name: Analyze directory sizes for top-level paths shell: >- du -h -d{{ max_depth }} {{ item }} 2>/dev/null | grep -E '^[0-9]+\.?[0-9]*G' | awk '{print $1 "\t" $2}' | sort -hr register: dir_sizes loop: "{{ scan_paths }}" changed_when: false failed_when: false - name: Parse directory size results set_fact: large_directories: >- {{ large_directories | default([]) + dir_sizes.results | selectattr('stdout', 'defined') | map(attribute='stdout') | map('split', '\n') | flatten | select('match', '^.+\t.+$') | map('regex_replace', '^([0-9]+\.?[0-9]*G)\t(.+)$', '{"size_human": "\\1", "size_gb": "\\1", "path": "\\2"}') | map('from_json') | map('combine', {'size_gb_num': (item.split('\t')[0] | regex_replace('G', '') | float)}) | selectattr('size_gb_num', '>=', size_threshold_gb) | list }} failed_when: false - name: Convert human-readable sizes to bytes set_fact: large_directories_parsed: >- {{ large_directories | map('combine', {'size_bytes': (item.size_gb_num | float * 1024 * 1024 * 1024 | int)}) | list }} - name: Find files larger than threshold find: paths: "{{ item }}" size: "{{ (size_threshold_gb * 1024 * 1024 * 1024) | int }}" recurse: true register: large_files loop: "{{ scan_paths }}" failed_when: false - name: Parse large file information set_fact: large_files_info: >- {{ large_files_info | default([]) + large_files.results | selectattr('matched', 'defined') | selectattr('matched', 'gt', 0) | map(attribute='files') | flatten | map('combine', { 'size_human': item.size | default(0) | human_readable, 'path': item.path }) | list }} loop: "{{ large_files.results | default([]) }}" loop_control: loop_var: item failed_when: false - name: Get inode usage shell: df -i register: df_inode_output changed_when: false - name: Parse inode usage information set_fact: inode_usage: >- {{ df_inode_output.stdout_lines[1:] | map('regex_replace', '^([^\s]+)\s+([^\s]+)\s+([^\s]+)\s+([^\s]+)\s+([^\s]+)\s+([^\s]+)$', '{"device": "\\1", "inodes_total": "\\2", "inodes_used": "\\3", "inodes_free": "\\4", "inodes_percent": "\\5", "mount": "\\6"}') | map('from_json') | map('combine', {'inodes_percent_num': (item.inodes_percent | regex_replace('%', '') | int)}) | list }} - name: Generate disk space report copy: dest: "{{ output_file }}" content: >- { "hostname": "{{ ansible_hostname }}", "ip_address": "{{ ansible_default_ipv4.address }}", "os": "{{ ansible_distribution }} {{ ansible_distribution_version }}", "analysis_date": "{{ ansible_date_time.iso8601 }}", "disk_usage": {{ disk_usage | to_json }}, "inode_usage": {{ inode_usage | to_json }}, "scan_parameters": { "paths": {{ scan_paths | to_json }}, "max_depth": {{ max_depth }}, "size_threshold_gb": {{ size_threshold_gb }}, "size_threshold_bytes": {{ (size_threshold_gb * 1024 * 1024 * 1024) | int }} }, "large_directories": { "count": {{ large_directories_parsed | default([]) | length }}, "threshold_gb": {{ size_threshold_gb }}, "directories": {{ large_directories_parsed | default([]) | to_json }} }, "large_files": { "count": {{ large_files_info | default([]) | length }}, "threshold_gb": {{ size_threshold_gb }}, "files": {{ large_files_info | default([]) | to_json }} }, "summary": { "total_large_directories": {{ large_directories_parsed | default([]) | length }}, "total_large_files": {{ large_files_info | default([]) | length }}, "disk_alerts": {{ disk_usage | selectattr('percent', 'search', '^[89][0-9]%|^100%$') | length > 0 }}, "inode_alerts": {{ inode_usage | selectattr('inodes_percent_num', 'gte', 90) | length > 0 }} } } mode: '0600' - name: Display disk space summary debug: msg: - "Disk space analysis completed on {{ ansible_hostname }}" - "Large directories found: {{ large_directories_parsed | default([]) | length }}" - "Large files found: {{ large_files_info | default([]) | length }}" - "Disk usage alerts: {{ disk_usage | selectattr('percent', 'search', '^[89][0-9]%|^100%$') | length > 0 }}" - "Inode usage alerts: {{ inode_usage | selectattr('inodes_percent_num', 'gte', 90) | length > 0 }}" - "Report saved to: {{ output_file }}" - name: Display top 5 largest directories debug: msg: "{{ item.size_human }}\t{{ item.path }}" loop: "{{ large_directories_parsed | default([]) | sort(attribute='size_gb_num', reverse=true) | first(5) }}" when: large_directories_parsed | default([]) | length > 0 - name: Return disk space findings set_fact: disk_space_report: hostname: ansible_hostname ip_address: ansible_default_ipv4.address os: ansible_distribution + ' ' + ansible_distribution_version disk_usage: disk_usage inode_usage: inode_usage large_directories: large_directories_parsed | default([]) large_files: large_files_info | default([]) summary: total_large_directories: large_directories_parsed | default([]) | length total_large_files: large_files_info | default([]) | length disk_alerts: disk_usage | selectattr('percent', 'search', '^[89][0-9]%|^100%$') | length > 0 inode_alerts: inode_usage | selectattr('inodes_percent_num', 'gte', 90) | length > 0 analysis_date: ansible_date_time.iso8601 report_file: output_file