823e467078
- Introduced a batch script to simplify the startup process for the Resource Monitor Service. - Included checks for .NET 9.0 Runtime installation. - Added build and run commands for the service with appropriate error handling. - Provided user instructions and API documentation links in the script output.
302 lines
12 KiB
C#
302 lines
12 KiB
C#
using Microsoft.Extensions.Logging;
|
|
using Microsoft.Extensions.Options;
|
|
using ResourceMonitorService.Configuration;
|
|
using ResourceMonitorService.Models;
|
|
using System.Collections.Concurrent;
|
|
|
|
namespace ResourceMonitorService.Services
|
|
{
|
|
public interface IAlertService
|
|
{
|
|
Task CheckAndGenerateAlertsAsync(ResourceUsage resourceUsage);
|
|
Task<List<Alert>> GetActiveAlertsAsync();
|
|
Task<List<Alert>> GetAlertHistoryAsync(int count = 100);
|
|
Task ResolveAlertAsync(string alertId);
|
|
Task<bool> IsAlertingEnabledAsync();
|
|
event EventHandler<Alert>? AlertTriggered;
|
|
event EventHandler<Alert>? AlertResolved;
|
|
}
|
|
|
|
public class AlertService : IAlertService
|
|
{
|
|
private readonly ILogger<AlertService> _logger;
|
|
private readonly MonitoringSettings _settings;
|
|
private readonly ConcurrentDictionary<string, Alert> _activeAlerts;
|
|
private readonly ConcurrentQueue<Alert> _alertHistory;
|
|
private readonly Dictionary<string, DateTime> _lastAlertTime;
|
|
private readonly Dictionary<string, DateTime> _thresholdExceededTime;
|
|
|
|
public event EventHandler<Alert>? AlertTriggered;
|
|
public event EventHandler<Alert>? AlertResolved;
|
|
|
|
public AlertService(ILogger<AlertService> logger, IOptions<MonitoringSettings> settings)
|
|
{
|
|
_logger = logger;
|
|
_settings = settings.Value;
|
|
_activeAlerts = new ConcurrentDictionary<string, Alert>();
|
|
_alertHistory = new ConcurrentQueue<Alert>();
|
|
_lastAlertTime = new Dictionary<string, DateTime>();
|
|
_thresholdExceededTime = new Dictionary<string, DateTime>();
|
|
}
|
|
|
|
public async Task CheckAndGenerateAlertsAsync(ResourceUsage resourceUsage)
|
|
{
|
|
if (!_settings.EnableAlerts)
|
|
return;
|
|
|
|
try
|
|
{
|
|
await Task.Run(() =>
|
|
{
|
|
// Check CPU usage
|
|
CheckThreshold("CPU", resourceUsage.CPU.Usage, "CPU Usage", "%");
|
|
|
|
// Check CPU temperature
|
|
if (resourceUsage.CPU.Temperature > 0)
|
|
CheckThreshold("CPUTemp", resourceUsage.CPU.Temperature, "CPU Temperature", "°C");
|
|
|
|
// Check Memory usage
|
|
CheckThreshold("Memory", resourceUsage.Memory.UsagePercentage, "Memory Usage", "%");
|
|
|
|
// Check GPU usage
|
|
if (resourceUsage.GPU.IsAvailable)
|
|
{
|
|
CheckThreshold("GPU", resourceUsage.GPU.Usage, "GPU Usage", "%");
|
|
if (resourceUsage.GPU.Temperature > 0)
|
|
CheckThreshold("GPUTemp", resourceUsage.GPU.Temperature, "GPU Temperature", "°C");
|
|
}
|
|
|
|
// Check disk usage
|
|
foreach (var disk in resourceUsage.Disks)
|
|
{
|
|
CheckThreshold($"Disk_{disk.DriveLetter}", disk.UsagePercentage,
|
|
$"Disk Usage ({disk.DriveLetter})", "%");
|
|
|
|
if (disk.DiskTime > 0)
|
|
CheckThreshold($"DiskTime_{disk.DriveLetter}", disk.DiskTime,
|
|
$"Disk Time ({disk.DriveLetter})", "%");
|
|
}
|
|
|
|
// Check for processes using too much memory
|
|
var topMemoryProcess = resourceUsage.TopProcesses
|
|
.OrderByDescending(p => p.MemoryUsage)
|
|
.FirstOrDefault();
|
|
|
|
if (topMemoryProcess != null)
|
|
{
|
|
var memoryUsageGB = topMemoryProcess.MemoryUsage / (1024.0 * 1024.0 * 1024.0);
|
|
if (memoryUsageGB > 4) // Alert if a single process is using more than 4GB
|
|
{
|
|
CheckCustomAlert($"ProcessMemory_{topMemoryProcess.Name}",
|
|
(float)memoryUsageGB, 4f, 8f,
|
|
$"High Memory Usage - {topMemoryProcess.Name}", "GB");
|
|
}
|
|
}
|
|
|
|
// Resolve alerts that are no longer active
|
|
ResolveInactiveAlerts(resourceUsage);
|
|
});
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
_logger.LogError(ex, "Error checking and generating alerts");
|
|
}
|
|
}
|
|
|
|
private void CheckThreshold(string component, float currentValue, string description, string unit)
|
|
{
|
|
var threshold = _settings.AlertThresholds.FirstOrDefault(t =>
|
|
t.Component.Equals(component, StringComparison.OrdinalIgnoreCase));
|
|
|
|
if (threshold == null || !threshold.IsEnabled)
|
|
return;
|
|
|
|
CheckCustomAlert(component, currentValue, threshold.WarningThreshold,
|
|
threshold.CriticalThreshold, description, unit, TimeSpan.FromSeconds(threshold.DurationSeconds));
|
|
}
|
|
|
|
private void CheckCustomAlert(string component, float currentValue, float warningThreshold,
|
|
float criticalThreshold, string description, string unit, TimeSpan? duration = null)
|
|
{
|
|
var alertDuration = duration ?? TimeSpan.FromSeconds(30);
|
|
var now = DateTime.Now;
|
|
|
|
// Determine alert level
|
|
string? alertLevel = null;
|
|
float thresholdValue = 0;
|
|
|
|
if (currentValue >= criticalThreshold)
|
|
{
|
|
alertLevel = "Critical";
|
|
thresholdValue = criticalThreshold;
|
|
}
|
|
else if (currentValue >= warningThreshold)
|
|
{
|
|
alertLevel = "Warning";
|
|
thresholdValue = warningThreshold;
|
|
}
|
|
|
|
if (alertLevel != null)
|
|
{
|
|
// Check if threshold has been exceeded for the required duration
|
|
var key = $"{component}_{alertLevel}";
|
|
|
|
if (!_thresholdExceededTime.ContainsKey(key))
|
|
{
|
|
_thresholdExceededTime[key] = now;
|
|
return; // Not exceeded long enough yet
|
|
}
|
|
|
|
var exceededDuration = now - _thresholdExceededTime[key];
|
|
if (exceededDuration < alertDuration)
|
|
return; // Not exceeded long enough yet
|
|
|
|
// Check if we've already sent this alert recently (avoid spam)
|
|
if (_lastAlertTime.TryGetValue(key, out var lastAlert))
|
|
{
|
|
if (now - lastAlert < TimeSpan.FromMinutes(5))
|
|
return; // Too soon since last alert
|
|
}
|
|
|
|
// Create and trigger alert
|
|
var alert = new Alert
|
|
{
|
|
Timestamp = now,
|
|
Component = component,
|
|
Level = alertLevel,
|
|
Message = $"{description} is {alertLevel.ToLower()}: {currentValue:F1}{unit} (threshold: {thresholdValue:F1}{unit})",
|
|
CurrentValue = currentValue,
|
|
ThresholdValue = thresholdValue,
|
|
IsResolved = false
|
|
};
|
|
|
|
var alertId = $"{component}_{alertLevel}_{now:yyyyMMddHHmmss}";
|
|
_activeAlerts[alertId] = alert;
|
|
_alertHistory.Enqueue(alert);
|
|
_lastAlertTime[key] = now;
|
|
|
|
// Trim history if too large
|
|
while (_alertHistory.Count > 1000)
|
|
{
|
|
_alertHistory.TryDequeue(out _);
|
|
}
|
|
|
|
_logger.LogWarning("Alert triggered: {Message}", alert.Message);
|
|
AlertTriggered?.Invoke(this, alert);
|
|
}
|
|
else
|
|
{
|
|
// Value is below threshold, remove tracking
|
|
var warningKey = $"{component}_Warning";
|
|
var criticalKey = $"{component}_Critical";
|
|
_thresholdExceededTime.Remove(warningKey);
|
|
_thresholdExceededTime.Remove(criticalKey);
|
|
}
|
|
}
|
|
|
|
private void ResolveInactiveAlerts(ResourceUsage resourceUsage)
|
|
{
|
|
var now = DateTime.Now;
|
|
var alertsToResolve = new List<string>();
|
|
|
|
foreach (var activeAlert in _activeAlerts)
|
|
{
|
|
var alert = activeAlert.Value;
|
|
var shouldResolve = false;
|
|
|
|
// Check if the condition that triggered the alert is no longer true
|
|
switch (alert.Component)
|
|
{
|
|
case "CPU":
|
|
shouldResolve = resourceUsage.CPU.Usage < alert.ThresholdValue;
|
|
break;
|
|
case "CPUTemp":
|
|
shouldResolve = resourceUsage.CPU.Temperature < alert.ThresholdValue;
|
|
break;
|
|
case "Memory":
|
|
shouldResolve = resourceUsage.Memory.UsagePercentage < alert.ThresholdValue;
|
|
break;
|
|
case "GPU":
|
|
shouldResolve = !resourceUsage.GPU.IsAvailable || resourceUsage.GPU.Usage < alert.ThresholdValue;
|
|
break;
|
|
case "GPUTemp":
|
|
shouldResolve = !resourceUsage.GPU.IsAvailable || resourceUsage.GPU.Temperature < alert.ThresholdValue;
|
|
break;
|
|
default:
|
|
// For disk alerts and others, check if component still exists and is below threshold
|
|
if (alert.Component.StartsWith("Disk_"))
|
|
{
|
|
var driveLetter = alert.Component.Replace("Disk_", "").Replace("DiskTime_", "");
|
|
var disk = resourceUsage.Disks.FirstOrDefault(d => d.DriveLetter.Contains(driveLetter));
|
|
if (disk != null)
|
|
{
|
|
shouldResolve = alert.Component.StartsWith("DiskTime_")
|
|
? disk.DiskTime < alert.ThresholdValue
|
|
: disk.UsagePercentage < alert.ThresholdValue;
|
|
}
|
|
else
|
|
{
|
|
shouldResolve = true; // Disk no longer available
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
|
|
// Auto-resolve old alerts (older than 1 hour)
|
|
if (now - alert.Timestamp > TimeSpan.FromHours(1))
|
|
{
|
|
shouldResolve = true;
|
|
}
|
|
|
|
if (shouldResolve)
|
|
{
|
|
alertsToResolve.Add(activeAlert.Key);
|
|
}
|
|
}
|
|
|
|
// Resolve alerts
|
|
foreach (var alertId in alertsToResolve)
|
|
{
|
|
if (_activeAlerts.TryRemove(alertId, out var resolvedAlert))
|
|
{
|
|
resolvedAlert.IsResolved = true;
|
|
resolvedAlert.ResolvedAt = now;
|
|
|
|
_logger.LogInformation("Alert resolved: {Message}", resolvedAlert.Message);
|
|
AlertResolved?.Invoke(this, resolvedAlert);
|
|
}
|
|
}
|
|
}
|
|
|
|
public async Task<List<Alert>> GetActiveAlertsAsync()
|
|
{
|
|
return await Task.FromResult(_activeAlerts.Values.ToList());
|
|
}
|
|
|
|
public async Task<List<Alert>> GetAlertHistoryAsync(int count = 100)
|
|
{
|
|
return await Task.FromResult(_alertHistory.TakeLast(count).ToList());
|
|
}
|
|
|
|
public async Task ResolveAlertAsync(string alertId)
|
|
{
|
|
await Task.Run(() =>
|
|
{
|
|
if (_activeAlerts.TryRemove(alertId, out var alert))
|
|
{
|
|
alert.IsResolved = true;
|
|
alert.ResolvedAt = DateTime.Now;
|
|
|
|
_logger.LogInformation("Alert manually resolved: {Message}", alert.Message);
|
|
AlertResolved?.Invoke(this, alert);
|
|
}
|
|
});
|
|
}
|
|
|
|
public async Task<bool> IsAlertingEnabledAsync()
|
|
{
|
|
return await Task.FromResult(_settings.EnableAlerts);
|
|
}
|
|
}
|
|
}
|