实现 LightOps 运维面板基础功能

This commit is contained in:
2026-05-25 01:13:03 +08:00
commit d3bb9f45a6
84 changed files with 23505 additions and 0 deletions

View File

@@ -0,0 +1,26 @@
[package]
name = "lightops-agent"
version.workspace = true
edition.workspace = true
license.workspace = true
[dependencies]
anyhow.workspace = true
async-trait.workspace = true
base64.workspace = true
chrono.workspace = true
clap.workspace = true
dashmap.workspace = true
futures-util.workspace = true
lightops-common = { path = "../lightops-common" }
portable-pty.workspace = true
serde.workspace = true
serde_json.workspace = true
sysinfo.workspace = true
tokio.workspace = true
tokio-tungstenite.workspace = true
toml.workspace = true
tracing.workspace = true
tracing-subscriber.workspace = true
url.workspace = true
uuid.workspace = true

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,59 @@
use anyhow::Result;
use serde::{Deserialize, Serialize};
use std::{fs, path::Path};
use url::Url;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AgentConfig {
pub server_url: String,
pub agent_id: Option<String>,
pub token: Option<String>,
pub secret: Option<String>,
pub name: Option<String>,
pub heartbeat_interval: Option<u64>,
#[serde(skip)]
pub config_path: Option<String>,
}
impl Default for AgentConfig {
fn default() -> Self {
Self {
server_url: "http://127.0.0.1:8080".to_string(),
agent_id: None,
token: None,
secret: None,
name: None,
heartbeat_interval: Some(30),
config_path: None,
}
}
}
impl AgentConfig {
pub fn load_optional(path: &str) -> Result<Self> {
if Path::new(path).exists() {
Ok(toml::from_str(&fs::read_to_string(path)?)?)
} else {
Ok(Self::default())
}
}
pub fn save(&self, path: &str) -> Result<()> {
if let Some(parent) = Path::new(path).parent() {
fs::create_dir_all(parent)?;
}
fs::write(path, toml::to_string_pretty(self)?)?;
Ok(())
}
pub fn ws_url(&self) -> Result<String> {
let mut url = Url::parse(&self.server_url)?;
url.set_scheme(match url.scheme() {
"https" => "wss",
_ => "ws",
})
.ok();
url.set_path("/api/agent/ws");
Ok(url.to_string())
}
}

View File

@@ -0,0 +1,343 @@
mod actions;
mod app;
mod config;
mod system_info;
mod terminal;
use anyhow::{Context, Result};
use clap::Parser;
use config::AgentConfig;
use dashmap::DashMap;
use futures_util::{SinkExt, StreamExt};
use lightops_common::protocol::{AgentCapabilities, AgentMessage, ServerMessage};
use std::{
sync::Arc,
time::{Duration, SystemTime, UNIX_EPOCH},
};
use tokio::sync::mpsc;
use tokio_tungstenite::{connect_async, tungstenite::Message};
use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt};
const CONNECT_TIMEOUT_SECS: u64 = 15;
const HANDSHAKE_TIMEOUT_SECS: u64 = 15;
const READ_GRACE_SECS: u64 = 100;
const MAX_RECONNECT_BACKOFF_SECS: u64 = 60;
#[derive(Debug, Parser)]
struct Args {
#[arg(long)]
server: Option<String>,
#[arg(long)]
token: Option<String>,
#[arg(long)]
config: Option<String>,
#[arg(long)]
name: Option<String>,
}
#[tokio::main]
async fn main() -> Result<()> {
tracing_subscriber::registry()
.with(
tracing_subscriber::EnvFilter::try_from_default_env().unwrap_or_else(|_| "info".into()),
)
.with(tracing_subscriber::fmt::layer())
.init();
let args = Args::parse();
let config_path = args.config.clone().unwrap_or_else(|| default_config_path());
let mut cfg = AgentConfig::load_optional(&config_path)?;
if let Some(server) = args.server {
cfg.server_url = server;
}
if let Some(token) = args.token {
cfg.token = Some(token);
}
if let Some(name) = args.name {
cfg.name = Some(name);
}
cfg.config_path = Some(config_path);
run_forever(cfg).await
}
fn default_config_path() -> String {
#[cfg(windows)]
{
"agent.toml".to_string()
}
#[cfg(not(windows))]
{
"/etc/lightops/agent.toml".to_string()
}
}
async fn run_forever(mut cfg: AgentConfig) -> Result<()> {
let mut backoff = 1u64;
loop {
match run_once(cfg.clone()).await {
Ok(updated) => {
cfg = updated;
tracing::warn!("Agent 连接已断开,准备重连");
tokio::time::sleep(reconnect_delay(1)).await;
backoff = 1;
}
Err(err) => {
tracing::warn!(?err, backoff, "Agent 连接失败,等待后重试");
tokio::time::sleep(reconnect_delay(backoff)).await;
backoff = (backoff * 2).min(MAX_RECONNECT_BACKOFF_SECS);
}
}
}
}
async fn run_once(mut cfg: AgentConfig) -> Result<AgentConfig> {
let ws_url = cfg.ws_url()?;
tracing::info!("正在连接主控端 {}", ws_url);
let (ws, _) = tokio::time::timeout(
Duration::from_secs(CONNECT_TIMEOUT_SECS),
connect_async(ws_url),
)
.await
.context("连接主控端超时")?
.context("连接主控端 WebSocket 失败")?;
let (mut write, mut read) = ws.split();
let (tx, mut rx) = mpsc::unbounded_channel::<AgentMessage>();
let streams = Arc::new(DashMap::new());
let hello = AgentMessage::AgentHello {
agent_id: cfg.agent_id.clone(),
token: cfg.token.clone(),
secret: cfg.secret.clone(),
hostname: hostname(),
os: std::env::consts::OS.to_string(),
arch: std::env::consts::ARCH.to_string(),
version: env!("CARGO_PKG_VERSION").to_string(),
capabilities: AgentCapabilities::default(),
};
write
.send(Message::Text(serde_json::to_string(&hello)?))
.await?;
let Some(Ok(Message::Text(first))) =
tokio::time::timeout(Duration::from_secs(HANDSHAKE_TIMEOUT_SECS), read.next())
.await
.context("等待主控端握手响应超时")?
else {
anyhow::bail!("Server 在接受 Agent 前关闭了连接");
};
match serde_json::from_str::<ServerMessage>(&first)? {
ServerMessage::AgentAccepted { agent_id, secret } => {
cfg.agent_id = Some(agent_id);
if let Some(id) = cfg.agent_id.as_deref() {
std::env::set_var("LIGHTOPS_AGENT_ID", id);
}
if let Some(secret) = secret {
cfg.secret = Some(secret);
cfg.token = None;
}
if let Some(path) = cfg.config_path.as_deref() {
cfg.save(path)?;
}
}
ServerMessage::ErrorMessage { message, .. } => anyhow::bail!(message),
_ => anyhow::bail!("Server 首条消息不符合预期"),
}
let writer = tokio::spawn(async move {
while let Some(msg) = rx.recv().await {
let Ok(text) = serde_json::to_string(&msg) else {
continue;
};
if write.send(Message::Text(text)).await.is_err() {
break;
}
}
});
let heartbeat_tx = tx.clone();
let heartbeat_id = cfg.agent_id.clone().unwrap_or_default();
let heartbeat_interval = cfg.heartbeat_interval.unwrap_or(30).max(10);
let heartbeat = tokio::spawn(async move {
loop {
let metrics = system_info::collect_metrics();
let _ = heartbeat_tx.send(AgentMessage::AgentHeartbeat {
agent_id: heartbeat_id.clone(),
metrics: Some(metrics),
});
tokio::time::sleep(Duration::from_secs(heartbeat_interval)).await;
}
});
loop {
let msg = tokio::time::timeout(Duration::from_secs(READ_GRACE_SECS), read.next()).await;
match msg {
Ok(Some(Ok(Message::Text(text)))) => {
let server_msg = serde_json::from_str::<ServerMessage>(&text)?;
handle_server_message(server_msg, tx.clone(), streams.clone(), &cfg).await;
}
Ok(Some(Ok(Message::Close(_)))) | Ok(None) => break,
Ok(Some(Ok(_))) => {}
Ok(Some(Err(err))) => return Err(err).context("读取主控端消息失败"),
Err(_) => anyhow::bail!("主控端连接静默超时"),
}
}
heartbeat.abort();
writer.abort();
Ok(cfg)
}
fn reconnect_delay(base_secs: u64) -> Duration {
let jitter_ms = SystemTime::now()
.duration_since(UNIX_EPOCH)
.map(|d| (d.subsec_millis() % 1000) as u64)
.unwrap_or(0);
Duration::from_millis(base_secs.saturating_mul(1000).saturating_add(jitter_ms))
}
async fn handle_server_message(
msg: ServerMessage,
tx: mpsc::UnboundedSender<AgentMessage>,
streams: Arc<DashMap<String, terminal::TerminalHandle>>,
cfg: &AgentConfig,
) {
match msg {
ServerMessage::ServerPing { timestamp } => {
let agent_id = cfg.agent_id.clone().unwrap_or_default();
let _ = tx.send(AgentMessage::AgentPong {
agent_id,
timestamp,
});
}
ServerMessage::TaskRequest {
task_id,
action,
params,
} => {
tokio::spawn(async move {
let _ = tx.send(AgentMessage::TaskEvent {
task_id: task_id.clone(),
level: "info".into(),
message: format!("开始执行 {action}"),
data: serde_json::json!({ "action": action }),
});
let result = actions::handle(&action, params).await;
let response = match result {
Ok(data) => {
emit_task_output_events(&tx, &task_id, &data);
let _ = tx.send(AgentMessage::TaskEvent {
task_id: task_id.clone(),
level: "info".into(),
message: "任务执行完成".into(),
data: serde_json::json!({ "success": true }),
});
AgentMessage::TaskResponse {
task_id,
success: true,
data,
error: None,
}
}
Err(err) => {
let error = err.to_string();
let _ = tx.send(AgentMessage::TaskEvent {
task_id: task_id.clone(),
level: "error".into(),
message: "任务执行失败".into(),
data: serde_json::json!({ "error": error }),
});
AgentMessage::TaskResponse {
task_id,
success: false,
data: serde_json::json!({}),
error: Some(error),
}
}
};
let _ = tx.send(response);
});
}
ServerMessage::StreamOpen {
stream_id,
kind,
meta,
} => {
if kind == "terminal" || kind == "docker.exec" {
let result = if kind == "docker.exec" {
terminal::open_docker_exec(stream_id.clone(), tx.clone(), meta)
} else {
terminal::open(stream_id.clone(), tx.clone(), meta)
};
match result {
Ok(handle) => {
streams.insert(stream_id, handle);
}
Err(err) => {
let _ = tx.send(AgentMessage::StreamClose {
stream_id,
reason: Some(err.to_string()),
});
}
}
}
}
ServerMessage::StreamData {
stream_id,
data,
binary,
} => {
if let Some(handle) = streams.get(&stream_id) {
let _ = handle.write(data, binary);
}
}
ServerMessage::StreamClose { stream_id, .. } => {
streams.remove(&stream_id);
}
ServerMessage::AgentAccepted { .. } => {}
ServerMessage::ErrorMessage { code, message } => {
tracing::warn!(%code, %message, "主控端返回连接错误");
}
}
}
fn emit_task_output_events(
tx: &mpsc::UnboundedSender<AgentMessage>,
task_id: &str,
data: &serde_json::Value,
) {
for key in ["stdout", "stderr", "pull_stdout", "pull_stderr"] {
let Some(value) = data.get(key).and_then(serde_json::Value::as_str) else {
continue;
};
let text = value.trim();
if text.is_empty() {
continue;
}
let level = if key.contains("stderr") {
"warn"
} else {
"info"
};
let _ = tx.send(AgentMessage::TaskEvent {
task_id: task_id.to_string(),
level: level.into(),
message: key.to_string(),
data: serde_json::json!({ "output": truncate_event_text(text) }),
});
}
}
fn truncate_event_text(text: &str) -> String {
const MAX_EVENT_TEXT: usize = 16 * 1024;
if text.len() <= MAX_EVENT_TEXT {
text.to_string()
} else {
format!("{}...(输出过长,已截断)", &text[..MAX_EVENT_TEXT])
}
}
fn hostname() -> String {
std::env::var("HOSTNAME")
.or_else(|_| std::env::var("COMPUTERNAME"))
.unwrap_or_else(|_| "lightops-node".to_string())
}

View File

@@ -0,0 +1,44 @@
use lightops_common::protocol::{NetworkInfo, SystemMetrics};
use std::collections::HashMap;
use sysinfo::{Disks, Networks, System};
pub fn collect_metrics() -> SystemMetrics {
let mut system = System::new_all();
system.refresh_all();
let cpu_usage = if system.cpus().is_empty() {
0.0
} else {
system
.cpus()
.iter()
.map(|c| c.cpu_usage() as f64)
.sum::<f64>()
/ system.cpus().len() as f64
};
let disks = Disks::new_with_refreshed_list();
let disk_total = disks.iter().map(|d| d.total_space()).sum();
let disk_available: u64 = disks.iter().map(|d| d.available_space()).sum();
let networks = Networks::new_with_refreshed_list()
.iter()
.map(|(name, data)| {
(
name.to_string(),
NetworkInfo {
received: data.total_received(),
transmitted: data.total_transmitted(),
},
)
})
.collect::<HashMap<_, _>>();
let load = System::load_average();
SystemMetrics {
cpu_usage,
memory_total: system.total_memory(),
memory_used: system.used_memory(),
disk_total,
disk_used: disk_total.saturating_sub(disk_available),
load_avg: load.one,
uptime: System::uptime(),
networks,
}
}

View File

@@ -0,0 +1,122 @@
use anyhow::Result;
use base64::Engine;
use lightops_common::protocol::AgentMessage;
use portable_pty::{native_pty_system, CommandBuilder, PtySize};
use serde_json::Value;
use std::{
io::{Read, Write},
sync::{Arc, Mutex},
thread,
};
use tokio::sync::mpsc;
pub struct TerminalHandle {
writer: Arc<Mutex<Box<dyn Write + Send>>>,
}
impl TerminalHandle {
pub fn write(&self, data: String, binary: bool) -> Result<()> {
let bytes = if binary {
base64::engine::general_purpose::STANDARD.decode(data)?
} else {
data.into_bytes()
};
self.writer.lock().expect("pty writer").write_all(&bytes)?;
Ok(())
}
}
pub fn open(
stream_id: String,
tx: mpsc::UnboundedSender<AgentMessage>,
meta: Value,
) -> Result<TerminalHandle> {
let cols = meta.get("cols").and_then(Value::as_u64).unwrap_or(100) as u16;
let rows = meta.get("rows").and_then(Value::as_u64).unwrap_or(30) as u16;
let pty_system = native_pty_system();
let pair = pty_system.openpty(PtySize {
rows,
cols,
pixel_width: 0,
pixel_height: 0,
})?;
let shell = std::env::var("SHELL").unwrap_or_else(|_| "/bin/sh".to_string());
let mut cmd = CommandBuilder::new(shell);
open_with_command(stream_id, tx, pair, &mut cmd)
}
pub fn open_docker_exec(
stream_id: String,
tx: mpsc::UnboundedSender<AgentMessage>,
meta: Value,
) -> Result<TerminalHandle> {
let cols = meta.get("cols").and_then(Value::as_u64).unwrap_or(100) as u16;
let rows = meta.get("rows").and_then(Value::as_u64).unwrap_or(30) as u16;
let container_id = meta
.get("container_id")
.and_then(Value::as_str)
.ok_or_else(|| anyhow::anyhow!("缺少容器 ID"))?;
validate_docker_id(container_id)?;
let shell = meta.get("shell").and_then(Value::as_str).unwrap_or("sh");
let shell = if shell == "bash" { "bash" } else { "sh" };
let pty_system = native_pty_system();
let pair = pty_system.openpty(PtySize {
rows,
cols,
pixel_width: 0,
pixel_height: 0,
})?;
let mut cmd = CommandBuilder::new("docker");
cmd.arg("exec");
cmd.arg("-it");
cmd.arg(container_id);
cmd.arg(shell);
open_with_command(stream_id, tx, pair, &mut cmd)
}
fn open_with_command(
stream_id: String,
tx: mpsc::UnboundedSender<AgentMessage>,
pair: portable_pty::PtyPair,
cmd: &mut CommandBuilder,
) -> Result<TerminalHandle> {
cmd.env("TERM", "xterm-256color");
let mut child = pair.slave.spawn_command(cmd)?;
let mut reader = pair.master.try_clone_reader()?;
let writer = Arc::new(Mutex::new(pair.master.take_writer()?));
let close_id = stream_id.clone();
thread::spawn(move || {
let mut buf = [0u8; 8192];
loop {
match reader.read(&mut buf) {
Ok(0) => break,
Ok(n) => {
let data = String::from_utf8_lossy(&buf[..n]).to_string();
let _ = tx.send(AgentMessage::StreamData {
stream_id: stream_id.clone(),
data,
binary: false,
});
}
Err(_) => break,
}
}
let _ = child.kill();
let _ = tx.send(AgentMessage::StreamClose {
stream_id: close_id,
reason: Some("终端已关闭".into()),
});
});
Ok(TerminalHandle { writer })
}
fn validate_docker_id(id: &str) -> Result<()> {
if id.len() > 200
|| !id
.chars()
.all(|c| c.is_ascii_alphanumeric() || ".:/@_-".contains(c))
{
anyhow::bail!("Docker 容器标识无效");
}
Ok(())
}

View File

@@ -0,0 +1,10 @@
[package]
name = "lightops-cli"
version.workspace = true
edition.workspace = true
license.workspace = true
[dependencies]
anyhow.workspace = true
clap.workspace = true

View File

@@ -0,0 +1,14 @@
use anyhow::Result;
use clap::Parser;
#[derive(Debug, Parser)]
struct Args {
#[arg(long)]
version: bool,
}
fn main() -> Result<()> {
let _args = Args::parse();
println!("lightops-cli {}", env!("CARGO_PKG_VERSION"));
Ok(())
}

View File

@@ -0,0 +1,12 @@
[package]
name = "lightops-common"
version.workspace = true
edition.workspace = true
license.workspace = true
[dependencies]
serde.workspace = true
serde_json.workspace = true
uuid.workspace = true
chrono.workspace = true

View File

@@ -0,0 +1,48 @@
use serde::{Deserialize, Serialize};
#[derive(Debug, Serialize)]
pub struct ApiResponse<T>
where
T: Serialize,
{
pub success: bool,
pub data: Option<T>,
pub error: Option<String>,
}
impl<T> ApiResponse<T>
where
T: Serialize,
{
pub fn ok(data: T) -> Self {
Self {
success: true,
data: Some(data),
error: None,
}
}
}
impl ApiResponse<()> {
pub fn empty() -> Self {
Self {
success: true,
data: Some(()),
error: None,
}
}
pub fn err(message: impl Into<String>) -> Self {
Self {
success: false,
data: None,
error: Some(message.into()),
}
}
}
#[derive(Debug, Deserialize)]
pub struct PageQuery {
pub limit: Option<i64>,
pub offset: Option<i64>,
}

View File

@@ -0,0 +1,2 @@
pub mod api;
pub mod protocol;

View File

@@ -0,0 +1,280 @@
use serde::{Deserialize, Serialize};
use serde_json::Value;
use std::collections::HashMap;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AgentCapabilities {
pub file: bool,
pub terminal: bool,
pub systemd: bool,
pub nginx: bool,
pub docker: bool,
pub logs: bool,
}
impl Default for AgentCapabilities {
fn default() -> Self {
Self {
file: true,
terminal: true,
systemd: true,
nginx: true,
docker: true,
logs: true,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SystemMetrics {
pub cpu_usage: f64,
pub memory_total: u64,
pub memory_used: u64,
pub disk_total: u64,
pub disk_used: u64,
pub load_avg: f64,
pub uptime: u64,
pub networks: HashMap<String, NetworkInfo>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct NetworkInfo {
pub received: u64,
pub transmitted: u64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "type")]
pub enum AgentMessage {
#[serde(rename = "agent.hello")]
AgentHello {
agent_id: Option<String>,
token: Option<String>,
secret: Option<String>,
hostname: String,
os: String,
arch: String,
version: String,
capabilities: AgentCapabilities,
},
#[serde(rename = "agent.heartbeat")]
AgentHeartbeat {
agent_id: String,
metrics: Option<SystemMetrics>,
},
#[serde(rename = "agent.pong")]
AgentPong { agent_id: String, timestamp: i64 },
#[serde(rename = "task.response")]
TaskResponse {
task_id: String,
success: bool,
data: Value,
error: Option<String>,
},
#[serde(rename = "task.event")]
TaskEvent {
task_id: String,
level: String,
message: String,
data: Value,
},
#[serde(rename = "stream.open")]
StreamOpen {
stream_id: String,
kind: String,
meta: Value,
},
#[serde(rename = "stream.data")]
StreamData {
stream_id: String,
data: String,
binary: bool,
},
#[serde(rename = "stream.close")]
StreamClose {
stream_id: String,
reason: Option<String>,
},
#[serde(rename = "error")]
ErrorMessage { code: String, message: String },
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "type")]
pub enum ServerMessage {
#[serde(rename = "agent.accepted")]
AgentAccepted {
agent_id: String,
secret: Option<String>,
},
#[serde(rename = "task.request")]
TaskRequest {
task_id: String,
action: String,
params: Value,
},
#[serde(rename = "server.ping")]
ServerPing { timestamp: i64 },
#[serde(rename = "stream.open")]
StreamOpen {
stream_id: String,
kind: String,
meta: Value,
},
#[serde(rename = "stream.data")]
StreamData {
stream_id: String,
data: String,
binary: bool,
},
#[serde(rename = "stream.close")]
StreamClose {
stream_id: String,
reason: Option<String>,
},
#[serde(rename = "error")]
ErrorMessage { code: String, message: String },
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FileEntry {
pub name: String,
pub path: String,
pub is_dir: bool,
pub size: u64,
pub modified: Option<String>,
pub readonly: bool,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ServiceInfo {
pub name: String,
pub load: String,
pub active: String,
pub sub: String,
pub description: String,
pub enabled: Option<bool>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DockerContainer {
pub id: String,
pub image: String,
pub command: String,
pub status: String,
pub names: String,
pub ports: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DockerImage {
pub repository: String,
pub tag: String,
pub id: String,
pub size: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct NginxSite {
pub name: String,
pub enabled: bool,
pub path: String,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub enum ApplicationType {
WebApp,
Service,
Database,
Runtime,
Tool,
Container,
ComposeProject,
StaticSite,
ReverseProxy,
Custom,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub enum ApplicationProviderType {
Systemd,
Docker,
DockerCompose,
Apt,
Dnf,
Pacman,
Snap,
Flatpak,
Binary,
PM2,
Supervisor,
NginxSite,
LightOpsManaged,
Custom,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub enum ApplicationStatus {
Running,
Stopped,
Failed,
Enabled,
Disabled,
Installing,
Updating,
Unknown,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Application {
pub id: String,
pub agent_id: String,
pub name: String,
pub display_name: String,
pub description: Option<String>,
pub app_type: ApplicationType,
pub provider: ApplicationProviderType,
pub status: ApplicationStatus,
pub version: Option<String>,
pub install_path: Option<String>,
pub work_dir: Option<String>,
pub config_paths: Vec<String>,
pub log_paths: Vec<String>,
pub data_paths: Vec<String>,
pub ports: Vec<u16>,
pub domains: Vec<String>,
pub service_name: Option<String>,
pub container_id: Option<String>,
pub compose_project: Option<String>,
pub package_name: Option<String>,
pub nginx_site: Option<String>,
pub run_user: Option<String>,
pub is_system: bool,
pub is_managed: bool,
pub is_lightops_managed: bool,
pub metadata: Value,
pub created_at: Option<String>,
pub updated_at: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ApplicationRelation {
pub id: String,
pub agent_id: String,
pub app_id: String,
pub relation_type: String,
pub target_id: Option<String>,
pub target_name: Option<String>,
pub metadata: Value,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ApplicationDetail {
pub application: Application,
pub relations: Vec<ApplicationRelation>,
pub recent_actions: Vec<Value>,
pub runtime_info: Value,
pub available_actions: Vec<String>,
pub risk_level: String,
pub provider_specific_info: Value,
}

View File

@@ -0,0 +1,30 @@
[package]
name = "lightops-server"
version.workspace = true
edition.workspace = true
license.workspace = true
[dependencies]
anyhow.workspace = true
argon2.workspace = true
async-trait.workspace = true
axum.workspace = true
base64.workspace = true
chrono.workspace = true
clap.workspace = true
dashmap.workspace = true
futures-util.workspace = true
jsonwebtoken.workspace = true
lightops-common = { path = "../lightops-common" }
rand.workspace = true
serde.workspace = true
serde_json.workspace = true
sha2.workspace = true
sqlx.workspace = true
thiserror.workspace = true
tokio.workspace = true
toml.workspace = true
tower-http.workspace = true
tracing.workspace = true
tracing-subscriber.workspace = true
uuid.workspace = true

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,146 @@
use crate::{error::AppError, state::AppState};
use argon2::{
password_hash::{PasswordHash, PasswordHasher, PasswordVerifier, SaltString},
Argon2,
};
use async_trait::async_trait;
use axum::{
extract::FromRequestParts,
http::{header::AUTHORIZATION, request::Parts},
};
use base64::{engine::general_purpose::URL_SAFE_NO_PAD, Engine};
use chrono::{Duration, Utc};
use jsonwebtoken::{decode, encode, DecodingKey, EncodingKey, Header, Validation};
use rand::{rngs::OsRng, RngCore};
use serde::{Deserialize, Serialize};
use sha2::{Digest, Sha256};
#[derive(Debug, Clone)]
pub struct AuthUser {
pub id: i64,
pub username: String,
pub role: String,
pub permissions: Vec<String>,
}
#[derive(Debug, Serialize, Deserialize)]
struct Claims {
sub: i64,
username: String,
role: String,
exp: usize,
}
#[async_trait]
impl FromRequestParts<AppState> for AuthUser {
type Rejection = AppError;
async fn from_request_parts(
parts: &mut Parts,
state: &AppState,
) -> Result<Self, Self::Rejection> {
let header_token = parts
.headers
.get(AUTHORIZATION)
.and_then(|v| v.to_str().ok())
.and_then(|h| h.strip_prefix("Bearer "))
.map(ToString::to_string);
let query_token = parts.uri.query().and_then(|query| {
query.split('&').find_map(|part| {
let (k, v) = part.split_once('=')?;
(k == "token").then(|| v.to_string())
})
});
let token = header_token.or(query_token).ok_or(AppError::Unauthorized)?;
let data = decode::<Claims>(
&token,
&DecodingKey::from_secret(state.cfg.jwt_secret.as_bytes()),
&Validation::default(),
)
.map_err(|_| AppError::Unauthorized)?;
let permissions = load_permissions(state, data.claims.sub, &data.claims.role).await?;
Ok(AuthUser {
id: data.claims.sub,
username: data.claims.username,
role: data.claims.role,
permissions,
})
}
}
async fn load_permissions(
state: &AppState,
user_id: i64,
role: &str,
) -> Result<Vec<String>, AppError> {
if role == "admin" {
return Ok(vec!["*".into()]);
}
let rows = sqlx::query_scalar::<_, String>(
"SELECT permission FROM user_permissions WHERE user_id = ? ORDER BY permission",
)
.bind(user_id)
.fetch_all(&state.pool)
.await
.map_err(|_| AppError::Unauthorized)?;
Ok(rows)
}
impl AuthUser {
pub fn can(&self, permission: &str) -> bool {
self.role == "admin"
|| self.permissions.iter().any(|item| {
item == "*"
|| item == permission
|| permission
.strip_prefix(&format!("{}.", item.trim_end_matches('*')))
.is_some()
|| permission
.strip_prefix(item.trim_end_matches('*'))
.is_some_and(|_| item.ends_with('*'))
})
}
}
pub fn hash_password(password: &str) -> anyhow::Result<String> {
let salt = SaltString::generate(&mut OsRng);
Ok(Argon2::default()
.hash_password(password.as_bytes(), &salt)
.map_err(|err| anyhow::anyhow!("密码哈希失败: {err}"))?
.to_string())
}
pub fn verify_password(password: &str, hash: &str) -> bool {
let Ok(parsed) = PasswordHash::new(hash) else {
return false;
};
Argon2::default()
.verify_password(password.as_bytes(), &parsed)
.is_ok()
}
pub fn make_jwt(user: &AuthUser, secret: &str) -> anyhow::Result<String> {
let claims = Claims {
sub: user.id,
username: user.username.clone(),
role: user.role.clone(),
exp: (Utc::now() + Duration::hours(12)).timestamp() as usize,
};
Ok(encode(
&Header::default(),
&claims,
&EncodingKey::from_secret(secret.as_bytes()),
)?)
}
pub fn random_token() -> String {
let mut bytes = [0u8; 32];
OsRng.fill_bytes(&mut bytes);
URL_SAFE_NO_PAD.encode(bytes)
}
pub fn token_hash(token: &str) -> String {
let mut hasher = Sha256::new();
hasher.update(token.as_bytes());
URL_SAFE_NO_PAD.encode(hasher.finalize())
}

View File

@@ -0,0 +1,38 @@
use anyhow::Result;
use serde::Deserialize;
use std::{fs, path::Path};
#[derive(Debug, Clone, Deserialize)]
pub struct ServerConfig {
pub bind: String,
pub database_url: String,
pub jwt_secret: String,
pub public_url: String,
pub static_dir: String,
pub registration_token_ttl_minutes: i64,
pub task_timeout_secs: u64,
}
impl Default for ServerConfig {
fn default() -> Self {
Self {
bind: "0.0.0.0:8080".to_string(),
database_url: "sqlite://lightops.db?mode=rwc".to_string(),
jwt_secret: "change-me-in-production".to_string(),
public_url: "http://127.0.0.1:8080".to_string(),
static_dir: "web/dist".to_string(),
registration_token_ttl_minutes: 30,
task_timeout_secs: 20,
}
}
}
impl ServerConfig {
pub fn load(path: &str) -> Result<Self> {
if Path::new(path).exists() {
Ok(toml::from_str(&fs::read_to_string(path)?)?)
} else {
Ok(Self::default())
}
}
}

View File

@@ -0,0 +1,7 @@
use anyhow::Result;
use sqlx::SqlitePool;
pub async fn migrate(pool: &SqlitePool) -> Result<()> {
sqlx::migrate!("../../migrations").run(pool).await?;
Ok(())
}

View File

@@ -0,0 +1,51 @@
use axum::{http::StatusCode, response::IntoResponse, Json};
use lightops_common::api::ApiResponse;
use thiserror::Error;
#[derive(Debug, Error)]
pub enum AppError {
#[error("未登录或登录已失效")]
Unauthorized,
#[error("无权执行此操作")]
Forbidden,
#[error("资源不存在")]
NotFound,
#[error("请求参数错误:{0}")]
BadRequest(String),
#[error("Agent 不在线")]
AgentOffline,
#[error("操作超时")]
Timeout,
#[error("服务器内部错误")]
Internal,
}
impl IntoResponse for AppError {
fn into_response(self) -> axum::response::Response {
let status = match self {
AppError::Unauthorized => StatusCode::UNAUTHORIZED,
AppError::Forbidden => StatusCode::FORBIDDEN,
AppError::NotFound => StatusCode::NOT_FOUND,
AppError::BadRequest(_) => StatusCode::BAD_REQUEST,
AppError::AgentOffline => StatusCode::SERVICE_UNAVAILABLE,
AppError::Timeout => StatusCode::GATEWAY_TIMEOUT,
AppError::Internal => StatusCode::INTERNAL_SERVER_ERROR,
};
let message = self.to_string();
(status, Json(ApiResponse::err(message))).into_response()
}
}
impl From<sqlx::Error> for AppError {
fn from(err: sqlx::Error) -> Self {
tracing::error!(?err, "database error");
AppError::Internal
}
}
impl From<anyhow::Error> for AppError {
fn from(err: anyhow::Error) -> Self {
tracing::error!(?err, "application error");
AppError::Internal
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,121 @@
mod apps;
mod auth;
mod config;
mod db;
mod error;
mod handlers;
mod maintenance;
mod state;
mod store;
mod task;
use anyhow::Result;
use axum::{http::header, response::IntoResponse, routing::get, Router};
use clap::Parser;
use config::ServerConfig;
use sqlx::sqlite::SqlitePoolOptions;
use std::net::SocketAddr;
use tokio::net::TcpListener;
use tower_http::{
cors::CorsLayer,
services::{ServeDir, ServeFile},
trace::TraceLayer,
};
use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt};
#[derive(Debug, Parser)]
struct Args {
#[arg(long, default_value = "config/server.toml")]
config: String,
}
#[tokio::main]
async fn main() -> Result<()> {
tracing_subscriber::registry()
.with(
tracing_subscriber::EnvFilter::try_from_default_env().unwrap_or_else(|_| "info".into()),
)
.with(tracing_subscriber::fmt::layer())
.init();
let args = Args::parse();
let cfg = ServerConfig::load(&args.config)?;
let pool = SqlitePoolOptions::new()
.max_connections(8)
.connect(&cfg.database_url)
.await?;
db::migrate(&pool).await?;
let state = state::AppState::new(pool, cfg.clone());
maintenance::spawn(state.clone());
let app = api_router(state.clone())
.route("/install-agent.sh", get(install_agent_script))
.route("/upgrade-agent.sh", get(upgrade_agent_script))
.route("/uninstall-agent.sh", get(uninstall_agent_script))
.fallback_service(
ServeDir::new(&cfg.static_dir)
.append_index_html_on_directories(true)
.not_found_service(ServeFile::new(format!("{}/index.html", cfg.static_dir))),
)
.layer(CorsLayer::permissive())
.layer(TraceLayer::new_for_http());
let addr: SocketAddr = cfg.bind.parse()?;
let listener = TcpListener::bind(addr).await?;
tracing::info!("LightOps 服务已监听 {}", addr);
axum::serve(
listener,
app.into_make_service_with_connect_info::<SocketAddr>(),
)
.with_graceful_shutdown(shutdown_signal())
.await?;
Ok(())
}
fn api_router(state: state::AppState) -> Router {
Router::new()
.nest("/api", handlers::router())
.with_state(state)
}
async fn install_agent_script() -> impl IntoResponse {
(
[(header::CONTENT_TYPE, "text/x-shellscript; charset=utf-8")],
include_str!("../../../scripts/install-agent.sh"),
)
}
async fn upgrade_agent_script() -> impl IntoResponse {
(
[(header::CONTENT_TYPE, "text/x-shellscript; charset=utf-8")],
include_str!("../../../scripts/upgrade-agent.sh"),
)
}
async fn uninstall_agent_script() -> impl IntoResponse {
(
[(header::CONTENT_TYPE, "text/x-shellscript; charset=utf-8")],
include_str!("../../../scripts/uninstall-agent.sh"),
)
}
async fn shutdown_signal() {
let ctrl_c = async {
let _ = tokio::signal::ctrl_c().await;
};
#[cfg(unix)]
let terminate = async {
let mut signal = tokio::signal::unix::signal(tokio::signal::unix::SignalKind::terminate())
.expect("install signal handler");
signal.recv().await;
};
#[cfg(not(unix))]
let terminate = std::future::pending::<()>();
tokio::select! {
_ = ctrl_c => {},
_ = terminate => {},
}
}

View File

@@ -0,0 +1,70 @@
use crate::state::AppState;
use chrono::{Duration, Utc};
use sqlx::Row;
pub fn spawn(state: AppState) {
tokio::spawn(async move {
let mut interval = tokio::time::interval(std::time::Duration::from_secs(60));
loop {
interval.tick().await;
if let Err(err) = run_once(&state).await {
tracing::warn!(?err, "后台维护任务执行失败");
}
}
});
}
async fn run_once(state: &AppState) -> anyhow::Result<()> {
mark_stale_agents_offline(state).await?;
prune_old_metrics(state).await?;
crate::apps::run_scheduled_health_checks(state).await?;
Ok(())
}
async fn mark_stale_agents_offline(state: &AppState) -> anyhow::Result<()> {
let offline_after = setting_i64(state, "agent.offline_after_seconds", 120)
.await
.max(30);
let cutoff = (Utc::now() - Duration::seconds(offline_after)).to_rfc3339();
let rows = sqlx::query(
"SELECT id FROM agents WHERE status = 'online' AND last_seen_at IS NOT NULL AND last_seen_at < ?",
)
.bind(&cutoff)
.fetch_all(&state.pool)
.await?;
for row in rows {
let agent_id: String = row.get("id");
state.agents.write().await.remove(&agent_id);
crate::task::fail_agent_pending_tasks(state, &agent_id, "Agent 心跳超时").await;
sqlx::query("UPDATE agents SET status = 'offline', updated_at = ? WHERE id = ?")
.bind(Utc::now().to_rfc3339())
.bind(&agent_id)
.execute(&state.pool)
.await?;
}
Ok(())
}
async fn prune_old_metrics(state: &AppState) -> anyhow::Result<()> {
let retention_days = setting_i64(state, "metrics.retention_days", 30)
.await
.max(1);
let modifier = format!("-{retention_days} days");
sqlx::query("DELETE FROM agent_metrics WHERE created_at < datetime('now', ?)")
.bind(modifier)
.execute(&state.pool)
.await?;
Ok(())
}
async fn setting_i64(state: &AppState, key: &str, default: i64) -> i64 {
sqlx::query_scalar::<_, String>("SELECT value FROM settings WHERE key = ?")
.bind(key)
.fetch_optional(&state.pool)
.await
.ok()
.flatten()
.and_then(|value| value.parse::<i64>().ok())
.unwrap_or(default)
}

View File

@@ -0,0 +1,43 @@
use crate::config::ServerConfig;
use dashmap::DashMap;
use lightops_common::protocol::{AgentMessage, ServerMessage};
use serde_json::Value;
use sqlx::SqlitePool;
use std::{collections::HashMap, sync::Arc};
use tokio::sync::{mpsc, oneshot, RwLock};
#[derive(Clone)]
pub struct AppState {
pub pool: SqlitePool,
pub cfg: Arc<ServerConfig>,
pub agents: Arc<RwLock<HashMap<String, AgentHandle>>>,
pub pending: Arc<DashMap<String, oneshot::Sender<TaskReply>>>,
pub pending_agents: Arc<DashMap<String, String>>,
pub streams: Arc<DashMap<String, mpsc::UnboundedSender<AgentMessage>>>,
}
#[derive(Clone)]
pub struct AgentHandle {
pub connection_id: String,
pub tx: mpsc::UnboundedSender<ServerMessage>,
}
#[derive(Debug)]
pub struct TaskReply {
pub success: bool,
pub data: Value,
pub error: Option<String>,
}
impl AppState {
pub fn new(pool: SqlitePool, cfg: ServerConfig) -> Self {
Self {
pool,
cfg: Arc::new(cfg),
agents: Arc::new(RwLock::new(HashMap::new())),
pending: Arc::new(DashMap::new()),
pending_agents: Arc::new(DashMap::new()),
streams: Arc::new(DashMap::new()),
}
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,226 @@
use crate::{
error::AppError,
state::{AppState, TaskReply},
};
use chrono::Utc;
use lightops_common::protocol::ServerMessage;
use serde_json::Value;
use tokio::sync::oneshot;
use uuid::Uuid;
pub async fn request_agent_task(
state: &AppState,
agent_id: &str,
user_id: Option<i64>,
action: &str,
params: Value,
) -> Result<TaskReply, AppError> {
request_agent_task_with_timeout(
state,
agent_id,
user_id,
action,
params,
state.cfg.task_timeout_secs,
)
.await
}
pub async fn request_agent_task_with_timeout(
state: &AppState,
agent_id: &str,
user_id: Option<i64>,
action: &str,
params: Value,
timeout_secs: u64,
) -> Result<TaskReply, AppError> {
request_agent_task_with_timeout_and_stored_params(
state,
agent_id,
user_id,
action,
params.clone(),
params,
timeout_secs,
)
.await
}
pub async fn request_agent_task_with_timeout_and_stored_params(
state: &AppState,
agent_id: &str,
user_id: Option<i64>,
action: &str,
params: Value,
stored_params: Value,
timeout_secs: u64,
) -> Result<TaskReply, AppError> {
let task_id = Uuid::new_v4().to_string();
sqlx::query(
"INSERT INTO tasks (id, agent_id, user_id, action, params_json, status, started_at) VALUES (?, ?, ?, ?, ?, 'running', ?)",
)
.bind(&task_id)
.bind(agent_id)
.bind(user_id)
.bind(action)
.bind(stored_params.to_string())
.bind(Utc::now().to_rfc3339())
.execute(&state.pool)
.await?;
append_task_event(
state,
&task_id,
"info",
"任务已创建",
Some(&serde_json::json!({ "action": action })),
)
.await?;
let handle = {
let agents = state.agents.read().await;
agents.get(agent_id).cloned()
}
.ok_or(AppError::AgentOffline)?;
let (tx, rx) = oneshot::channel();
state.pending.insert(task_id.clone(), tx);
state
.pending_agents
.insert(task_id.clone(), agent_id.to_string());
let msg = ServerMessage::TaskRequest {
task_id: task_id.clone(),
action: action.to_string(),
params,
};
if handle.tx.send(msg).is_err() {
state.pending.remove(&task_id);
state.pending_agents.remove(&task_id);
mark_task(state, &task_id, "failed", None, Some("Agent 已断开连接")).await?;
return Err(AppError::AgentOffline);
}
append_task_event(state, &task_id, "info", "任务已下发到 Agent", None).await?;
let reply = match tokio::time::timeout(std::time::Duration::from_secs(timeout_secs), rx).await {
Ok(Ok(reply)) => reply,
Ok(Err(_)) => {
state.pending_agents.remove(&task_id);
mark_task(state, &task_id, "failed", None, Some("Agent 已断开连接")).await?;
return Err(AppError::AgentOffline);
}
Err(_) => {
state.pending.remove(&task_id);
state.pending_agents.remove(&task_id);
mark_task(state, &task_id, "timeout", None, Some("任务超时")).await?;
return Err(AppError::Timeout);
}
};
let status = if reply.success { "success" } else { "failed" };
mark_task(
state,
&task_id,
status,
Some(&reply.data),
reply.error.as_deref(),
)
.await?;
Ok(reply)
}
pub async fn append_task_event(
state: &AppState,
task_id: &str,
level: &str,
message: &str,
data: Option<&Value>,
) -> Result<(), AppError> {
sqlx::query(
"INSERT INTO task_events(task_id, level, message, data_json, created_at) VALUES(?, ?, ?, ?, ?)",
)
.bind(task_id)
.bind(level)
.bind(message.chars().take(500).collect::<String>())
.bind(data.map(|value| value.to_string()))
.bind(Utc::now().to_rfc3339())
.execute(&state.pool)
.await?;
Ok(())
}
pub async fn fail_agent_pending_tasks(state: &AppState, agent_id: &str, reason: &str) {
let task_ids: Vec<String> = state
.pending_agents
.iter()
.filter(|item| item.value() == agent_id)
.map(|item| item.key().clone())
.collect();
for task_id in task_ids {
state.pending_agents.remove(&task_id);
if let Some((_, tx)) = state.pending.remove(&task_id) {
let _ = tx.send(TaskReply {
success: false,
data: serde_json::json!({}),
error: Some(reason.to_string()),
});
}
let _ = mark_task(state, &task_id, "failed", None, Some(reason)).await;
}
}
pub async fn mark_task(
state: &AppState,
task_id: &str,
status: &str,
result: Option<&Value>,
error: Option<&str>,
) -> Result<(), AppError> {
sqlx::query(
"UPDATE tasks SET status = ?, result_json = ?, error = ?, finished_at = ? WHERE id = ?",
)
.bind(status)
.bind(result.map(|v| v.to_string()))
.bind(error)
.bind(Utc::now().to_rfc3339())
.bind(task_id)
.execute(&state.pool)
.await?;
let level = if status == "success" { "info" } else { "error" };
let message = match status {
"success" => "任务成功完成",
"timeout" => "任务执行超时",
"cancelled" => "任务已取消",
_ => "任务执行失败",
};
let data = serde_json::json!({
"status": status,
"error": error,
"result": result
});
append_task_event(state, task_id, level, message, Some(&data)).await?;
Ok(())
}
pub async fn audit(
state: &AppState,
user_id: Option<i64>,
agent_id: Option<&str>,
action: &str,
target: Option<&str>,
params_summary: Option<String>,
success: bool,
error: Option<&str>,
) {
let _ = sqlx::query(
"INSERT INTO audit_logs (user_id, agent_id, action, target, params_summary, success, error) VALUES (?, ?, ?, ?, ?, ?, ?)",
)
.bind(user_id)
.bind(agent_id)
.bind(action)
.bind(target)
.bind(params_summary)
.bind(if success { 1 } else { 0 })
.bind(error)
.execute(&state.pool)
.await;
}