maintenance-scripts/playbooks/analyze_disk_space.yml
rebecca 3574b47a5f Initialize infrastructure maintenance scripts with Ansible playbooks
Add Ansible-based maintenance scripts for infrastructure operations:
- CVE scanner using NIST NVD database
- Package update checker with OpenAI risk assessment
- Docker cleanup playbook
- Log archiver for rotated logs
- Disk space analyzer

Supports Ubuntu 20.04/22.04/24.04, Debian 11/12/13, and Alpine Linux
2026-01-22 10:37:08 -03:00

183 lines
7.2 KiB
YAML

---
- name: Analyze Disk Space and Identify Large Directories
hosts: all
gather_facts: true
vars:
scan_paths:
- "/"
- "/var"
- "/home"
- "/opt"
- "/usr"
- "/tmp"
max_depth: 5
size_threshold_gb: 1
output_file: "/tmp/disk_space_report_{{ ansible_date_time.iso8601_basic_short }}.json"
tasks:
- name: Get overall disk usage
shell: df -h
register: df_output
changed_when: false
- name: Parse disk usage information
set_fact:
disk_usage: >-
{{ df_output.stdout_lines[1:] |
map('regex_replace', '^([^\s]+)\s+([^\s]+)\s+([^\s]+)\s+([^\s]+)\s+([^\s]+)\s+([^\s]+)$', '{"device": "\\1", "size": "\\2", "used": "\\3", "available": "\\4", "percent": "\\5", "mount": "\\6"}') |
map('from_json') |
list }}
- name: Find directories exceeding size threshold
find:
paths: "{{ item }}"
file_type: directory
recurse: false
register: dir_list
loop: "{{ scan_paths }}"
failed_when: false
- name: Analyze directory sizes for top-level paths
shell: >-
du -h -d{{ max_depth }} {{ item }} 2>/dev/null | grep -E '^[0-9]+\.?[0-9]*G' | awk '{print $1 "\t" $2}' | sort -hr
register: dir_sizes
loop: "{{ scan_paths }}"
changed_when: false
failed_when: false
- name: Parse directory size results
set_fact:
large_directories: >-
{{ large_directories | default([]) +
dir_sizes.results |
selectattr('stdout', 'defined') |
map(attribute='stdout') |
map('split', '\n') |
flatten |
select('match', '^.+\t.+$') |
map('regex_replace', '^([0-9]+\.?[0-9]*G)\t(.+)$', '{"size_human": "\\1", "size_gb": "\\1", "path": "\\2"}') |
map('from_json') |
map('combine', {'size_gb_num': (item.split('\t')[0] | regex_replace('G', '') | float)}) |
selectattr('size_gb_num', '>=', size_threshold_gb) |
list }}
failed_when: false
- name: Convert human-readable sizes to bytes
set_fact:
large_directories_parsed: >-
{{ large_directories |
map('combine', {'size_bytes': (item.size_gb_num | float * 1024 * 1024 * 1024 | int)}) |
list }}
- name: Find files larger than threshold
find:
paths: "{{ item }}"
size: "{{ (size_threshold_gb * 1024 * 1024 * 1024) | int }}"
recurse: true
register: large_files
loop: "{{ scan_paths }}"
failed_when: false
- name: Parse large file information
set_fact:
large_files_info: >-
{{ large_files_info | default([]) +
large_files.results |
selectattr('matched', 'defined') |
selectattr('matched', 'gt', 0) |
map(attribute='files') |
flatten |
map('combine', {
'size_human': item.size | default(0) | human_readable,
'path': item.path
}) |
list }}
loop: "{{ large_files.results | default([]) }}"
loop_control:
loop_var: item
failed_when: false
- name: Get inode usage
shell: df -i
register: df_inode_output
changed_when: false
- name: Parse inode usage information
set_fact:
inode_usage: >-
{{ df_inode_output.stdout_lines[1:] |
map('regex_replace', '^([^\s]+)\s+([^\s]+)\s+([^\s]+)\s+([^\s]+)\s+([^\s]+)\s+([^\s]+)$', '{"device": "\\1", "inodes_total": "\\2", "inodes_used": "\\3", "inodes_free": "\\4", "inodes_percent": "\\5", "mount": "\\6"}') |
map('from_json') |
map('combine', {'inodes_percent_num': (item.inodes_percent | regex_replace('%', '') | int)}) |
list }}
- name: Generate disk space report
copy:
dest: "{{ output_file }}"
content: >-
{
"hostname": "{{ ansible_hostname }}",
"ip_address": "{{ ansible_default_ipv4.address }}",
"os": "{{ ansible_distribution }} {{ ansible_distribution_version }}",
"analysis_date": "{{ ansible_date_time.iso8601 }}",
"disk_usage": {{ disk_usage | to_json }},
"inode_usage": {{ inode_usage | to_json }},
"scan_parameters": {
"paths": {{ scan_paths | to_json }},
"max_depth": {{ max_depth }},
"size_threshold_gb": {{ size_threshold_gb }},
"size_threshold_bytes": {{ (size_threshold_gb * 1024 * 1024 * 1024) | int }}
},
"large_directories": {
"count": {{ large_directories_parsed | default([]) | length }},
"threshold_gb": {{ size_threshold_gb }},
"directories": {{ large_directories_parsed | default([]) | to_json }}
},
"large_files": {
"count": {{ large_files_info | default([]) | length }},
"threshold_gb": {{ size_threshold_gb }},
"files": {{ large_files_info | default([]) | to_json }}
},
"summary": {
"total_large_directories": {{ large_directories_parsed | default([]) | length }},
"total_large_files": {{ large_files_info | default([]) | length }},
"disk_alerts": {{ disk_usage | selectattr('percent', 'search', '^[89][0-9]%|^100%$') | length > 0 }},
"inode_alerts": {{ inode_usage | selectattr('inodes_percent_num', 'gte', 90) | length > 0 }}
}
}
mode: '0600'
- name: Display disk space summary
debug:
msg:
- "Disk space analysis completed on {{ ansible_hostname }}"
- "Large directories found: {{ large_directories_parsed | default([]) | length }}"
- "Large files found: {{ large_files_info | default([]) | length }}"
- "Disk usage alerts: {{ disk_usage | selectattr('percent', 'search', '^[89][0-9]%|^100%$') | length > 0 }}"
- "Inode usage alerts: {{ inode_usage | selectattr('inodes_percent_num', 'gte', 90) | length > 0 }}"
- "Report saved to: {{ output_file }}"
- name: Display top 5 largest directories
debug:
msg: "{{ item.size_human }}\t{{ item.path }}"
loop: "{{ large_directories_parsed | default([]) | sort(attribute='size_gb_num', reverse=true) | first(5) }}"
when: large_directories_parsed | default([]) | length > 0
- name: Return disk space findings
set_fact:
disk_space_report:
hostname: ansible_hostname
ip_address: ansible_default_ipv4.address
os: ansible_distribution + ' ' + ansible_distribution_version
disk_usage: disk_usage
inode_usage: inode_usage
large_directories: large_directories_parsed | default([])
large_files: large_files_info | default([])
summary:
total_large_directories: large_directories_parsed | default([]) | length
total_large_files: large_files_info | default([]) | length
disk_alerts: disk_usage | selectattr('percent', 'search', '^[89][0-9]%|^100%$') | length > 0
inode_alerts: inode_usage | selectattr('inodes_percent_num', 'gte', 90) | length > 0
analysis_date: ansible_date_time.iso8601
report_file: output_file