1
0
forked from Eeveid/lightOps

实现 LightOps 运维面板基础功能

This commit is contained in:
2026-05-25 01:13:03 +08:00
commit d3bb9f45a6
84 changed files with 23505 additions and 0 deletions

View File

@@ -0,0 +1,30 @@
[package]
name = "lightops-server"
version.workspace = true
edition.workspace = true
license.workspace = true
[dependencies]
anyhow.workspace = true
argon2.workspace = true
async-trait.workspace = true
axum.workspace = true
base64.workspace = true
chrono.workspace = true
clap.workspace = true
dashmap.workspace = true
futures-util.workspace = true
jsonwebtoken.workspace = true
lightops-common = { path = "../lightops-common" }
rand.workspace = true
serde.workspace = true
serde_json.workspace = true
sha2.workspace = true
sqlx.workspace = true
thiserror.workspace = true
tokio.workspace = true
toml.workspace = true
tower-http.workspace = true
tracing.workspace = true
tracing-subscriber.workspace = true
uuid.workspace = true

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,146 @@
use crate::{error::AppError, state::AppState};
use argon2::{
password_hash::{PasswordHash, PasswordHasher, PasswordVerifier, SaltString},
Argon2,
};
use async_trait::async_trait;
use axum::{
extract::FromRequestParts,
http::{header::AUTHORIZATION, request::Parts},
};
use base64::{engine::general_purpose::URL_SAFE_NO_PAD, Engine};
use chrono::{Duration, Utc};
use jsonwebtoken::{decode, encode, DecodingKey, EncodingKey, Header, Validation};
use rand::{rngs::OsRng, RngCore};
use serde::{Deserialize, Serialize};
use sha2::{Digest, Sha256};
#[derive(Debug, Clone)]
pub struct AuthUser {
pub id: i64,
pub username: String,
pub role: String,
pub permissions: Vec<String>,
}
#[derive(Debug, Serialize, Deserialize)]
struct Claims {
sub: i64,
username: String,
role: String,
exp: usize,
}
#[async_trait]
impl FromRequestParts<AppState> for AuthUser {
type Rejection = AppError;
async fn from_request_parts(
parts: &mut Parts,
state: &AppState,
) -> Result<Self, Self::Rejection> {
let header_token = parts
.headers
.get(AUTHORIZATION)
.and_then(|v| v.to_str().ok())
.and_then(|h| h.strip_prefix("Bearer "))
.map(ToString::to_string);
let query_token = parts.uri.query().and_then(|query| {
query.split('&').find_map(|part| {
let (k, v) = part.split_once('=')?;
(k == "token").then(|| v.to_string())
})
});
let token = header_token.or(query_token).ok_or(AppError::Unauthorized)?;
let data = decode::<Claims>(
&token,
&DecodingKey::from_secret(state.cfg.jwt_secret.as_bytes()),
&Validation::default(),
)
.map_err(|_| AppError::Unauthorized)?;
let permissions = load_permissions(state, data.claims.sub, &data.claims.role).await?;
Ok(AuthUser {
id: data.claims.sub,
username: data.claims.username,
role: data.claims.role,
permissions,
})
}
}
async fn load_permissions(
state: &AppState,
user_id: i64,
role: &str,
) -> Result<Vec<String>, AppError> {
if role == "admin" {
return Ok(vec!["*".into()]);
}
let rows = sqlx::query_scalar::<_, String>(
"SELECT permission FROM user_permissions WHERE user_id = ? ORDER BY permission",
)
.bind(user_id)
.fetch_all(&state.pool)
.await
.map_err(|_| AppError::Unauthorized)?;
Ok(rows)
}
impl AuthUser {
pub fn can(&self, permission: &str) -> bool {
self.role == "admin"
|| self.permissions.iter().any(|item| {
item == "*"
|| item == permission
|| permission
.strip_prefix(&format!("{}.", item.trim_end_matches('*')))
.is_some()
|| permission
.strip_prefix(item.trim_end_matches('*'))
.is_some_and(|_| item.ends_with('*'))
})
}
}
pub fn hash_password(password: &str) -> anyhow::Result<String> {
let salt = SaltString::generate(&mut OsRng);
Ok(Argon2::default()
.hash_password(password.as_bytes(), &salt)
.map_err(|err| anyhow::anyhow!("密码哈希失败: {err}"))?
.to_string())
}
pub fn verify_password(password: &str, hash: &str) -> bool {
let Ok(parsed) = PasswordHash::new(hash) else {
return false;
};
Argon2::default()
.verify_password(password.as_bytes(), &parsed)
.is_ok()
}
pub fn make_jwt(user: &AuthUser, secret: &str) -> anyhow::Result<String> {
let claims = Claims {
sub: user.id,
username: user.username.clone(),
role: user.role.clone(),
exp: (Utc::now() + Duration::hours(12)).timestamp() as usize,
};
Ok(encode(
&Header::default(),
&claims,
&EncodingKey::from_secret(secret.as_bytes()),
)?)
}
pub fn random_token() -> String {
let mut bytes = [0u8; 32];
OsRng.fill_bytes(&mut bytes);
URL_SAFE_NO_PAD.encode(bytes)
}
pub fn token_hash(token: &str) -> String {
let mut hasher = Sha256::new();
hasher.update(token.as_bytes());
URL_SAFE_NO_PAD.encode(hasher.finalize())
}

View File

@@ -0,0 +1,38 @@
use anyhow::Result;
use serde::Deserialize;
use std::{fs, path::Path};
#[derive(Debug, Clone, Deserialize)]
pub struct ServerConfig {
pub bind: String,
pub database_url: String,
pub jwt_secret: String,
pub public_url: String,
pub static_dir: String,
pub registration_token_ttl_minutes: i64,
pub task_timeout_secs: u64,
}
impl Default for ServerConfig {
fn default() -> Self {
Self {
bind: "0.0.0.0:8080".to_string(),
database_url: "sqlite://lightops.db?mode=rwc".to_string(),
jwt_secret: "change-me-in-production".to_string(),
public_url: "http://127.0.0.1:8080".to_string(),
static_dir: "web/dist".to_string(),
registration_token_ttl_minutes: 30,
task_timeout_secs: 20,
}
}
}
impl ServerConfig {
pub fn load(path: &str) -> Result<Self> {
if Path::new(path).exists() {
Ok(toml::from_str(&fs::read_to_string(path)?)?)
} else {
Ok(Self::default())
}
}
}

View File

@@ -0,0 +1,7 @@
use anyhow::Result;
use sqlx::SqlitePool;
pub async fn migrate(pool: &SqlitePool) -> Result<()> {
sqlx::migrate!("../../migrations").run(pool).await?;
Ok(())
}

View File

@@ -0,0 +1,51 @@
use axum::{http::StatusCode, response::IntoResponse, Json};
use lightops_common::api::ApiResponse;
use thiserror::Error;
#[derive(Debug, Error)]
pub enum AppError {
#[error("未登录或登录已失效")]
Unauthorized,
#[error("无权执行此操作")]
Forbidden,
#[error("资源不存在")]
NotFound,
#[error("请求参数错误:{0}")]
BadRequest(String),
#[error("Agent 不在线")]
AgentOffline,
#[error("操作超时")]
Timeout,
#[error("服务器内部错误")]
Internal,
}
impl IntoResponse for AppError {
fn into_response(self) -> axum::response::Response {
let status = match self {
AppError::Unauthorized => StatusCode::UNAUTHORIZED,
AppError::Forbidden => StatusCode::FORBIDDEN,
AppError::NotFound => StatusCode::NOT_FOUND,
AppError::BadRequest(_) => StatusCode::BAD_REQUEST,
AppError::AgentOffline => StatusCode::SERVICE_UNAVAILABLE,
AppError::Timeout => StatusCode::GATEWAY_TIMEOUT,
AppError::Internal => StatusCode::INTERNAL_SERVER_ERROR,
};
let message = self.to_string();
(status, Json(ApiResponse::err(message))).into_response()
}
}
impl From<sqlx::Error> for AppError {
fn from(err: sqlx::Error) -> Self {
tracing::error!(?err, "database error");
AppError::Internal
}
}
impl From<anyhow::Error> for AppError {
fn from(err: anyhow::Error) -> Self {
tracing::error!(?err, "application error");
AppError::Internal
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,121 @@
mod apps;
mod auth;
mod config;
mod db;
mod error;
mod handlers;
mod maintenance;
mod state;
mod store;
mod task;
use anyhow::Result;
use axum::{http::header, response::IntoResponse, routing::get, Router};
use clap::Parser;
use config::ServerConfig;
use sqlx::sqlite::SqlitePoolOptions;
use std::net::SocketAddr;
use tokio::net::TcpListener;
use tower_http::{
cors::CorsLayer,
services::{ServeDir, ServeFile},
trace::TraceLayer,
};
use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt};
#[derive(Debug, Parser)]
struct Args {
#[arg(long, default_value = "config/server.toml")]
config: String,
}
#[tokio::main]
async fn main() -> Result<()> {
tracing_subscriber::registry()
.with(
tracing_subscriber::EnvFilter::try_from_default_env().unwrap_or_else(|_| "info".into()),
)
.with(tracing_subscriber::fmt::layer())
.init();
let args = Args::parse();
let cfg = ServerConfig::load(&args.config)?;
let pool = SqlitePoolOptions::new()
.max_connections(8)
.connect(&cfg.database_url)
.await?;
db::migrate(&pool).await?;
let state = state::AppState::new(pool, cfg.clone());
maintenance::spawn(state.clone());
let app = api_router(state.clone())
.route("/install-agent.sh", get(install_agent_script))
.route("/upgrade-agent.sh", get(upgrade_agent_script))
.route("/uninstall-agent.sh", get(uninstall_agent_script))
.fallback_service(
ServeDir::new(&cfg.static_dir)
.append_index_html_on_directories(true)
.not_found_service(ServeFile::new(format!("{}/index.html", cfg.static_dir))),
)
.layer(CorsLayer::permissive())
.layer(TraceLayer::new_for_http());
let addr: SocketAddr = cfg.bind.parse()?;
let listener = TcpListener::bind(addr).await?;
tracing::info!("LightOps 服务已监听 {}", addr);
axum::serve(
listener,
app.into_make_service_with_connect_info::<SocketAddr>(),
)
.with_graceful_shutdown(shutdown_signal())
.await?;
Ok(())
}
fn api_router(state: state::AppState) -> Router {
Router::new()
.nest("/api", handlers::router())
.with_state(state)
}
async fn install_agent_script() -> impl IntoResponse {
(
[(header::CONTENT_TYPE, "text/x-shellscript; charset=utf-8")],
include_str!("../../../scripts/install-agent.sh"),
)
}
async fn upgrade_agent_script() -> impl IntoResponse {
(
[(header::CONTENT_TYPE, "text/x-shellscript; charset=utf-8")],
include_str!("../../../scripts/upgrade-agent.sh"),
)
}
async fn uninstall_agent_script() -> impl IntoResponse {
(
[(header::CONTENT_TYPE, "text/x-shellscript; charset=utf-8")],
include_str!("../../../scripts/uninstall-agent.sh"),
)
}
async fn shutdown_signal() {
let ctrl_c = async {
let _ = tokio::signal::ctrl_c().await;
};
#[cfg(unix)]
let terminate = async {
let mut signal = tokio::signal::unix::signal(tokio::signal::unix::SignalKind::terminate())
.expect("install signal handler");
signal.recv().await;
};
#[cfg(not(unix))]
let terminate = std::future::pending::<()>();
tokio::select! {
_ = ctrl_c => {},
_ = terminate => {},
}
}

View File

@@ -0,0 +1,70 @@
use crate::state::AppState;
use chrono::{Duration, Utc};
use sqlx::Row;
pub fn spawn(state: AppState) {
tokio::spawn(async move {
let mut interval = tokio::time::interval(std::time::Duration::from_secs(60));
loop {
interval.tick().await;
if let Err(err) = run_once(&state).await {
tracing::warn!(?err, "后台维护任务执行失败");
}
}
});
}
async fn run_once(state: &AppState) -> anyhow::Result<()> {
mark_stale_agents_offline(state).await?;
prune_old_metrics(state).await?;
crate::apps::run_scheduled_health_checks(state).await?;
Ok(())
}
async fn mark_stale_agents_offline(state: &AppState) -> anyhow::Result<()> {
let offline_after = setting_i64(state, "agent.offline_after_seconds", 120)
.await
.max(30);
let cutoff = (Utc::now() - Duration::seconds(offline_after)).to_rfc3339();
let rows = sqlx::query(
"SELECT id FROM agents WHERE status = 'online' AND last_seen_at IS NOT NULL AND last_seen_at < ?",
)
.bind(&cutoff)
.fetch_all(&state.pool)
.await?;
for row in rows {
let agent_id: String = row.get("id");
state.agents.write().await.remove(&agent_id);
crate::task::fail_agent_pending_tasks(state, &agent_id, "Agent 心跳超时").await;
sqlx::query("UPDATE agents SET status = 'offline', updated_at = ? WHERE id = ?")
.bind(Utc::now().to_rfc3339())
.bind(&agent_id)
.execute(&state.pool)
.await?;
}
Ok(())
}
async fn prune_old_metrics(state: &AppState) -> anyhow::Result<()> {
let retention_days = setting_i64(state, "metrics.retention_days", 30)
.await
.max(1);
let modifier = format!("-{retention_days} days");
sqlx::query("DELETE FROM agent_metrics WHERE created_at < datetime('now', ?)")
.bind(modifier)
.execute(&state.pool)
.await?;
Ok(())
}
async fn setting_i64(state: &AppState, key: &str, default: i64) -> i64 {
sqlx::query_scalar::<_, String>("SELECT value FROM settings WHERE key = ?")
.bind(key)
.fetch_optional(&state.pool)
.await
.ok()
.flatten()
.and_then(|value| value.parse::<i64>().ok())
.unwrap_or(default)
}

View File

@@ -0,0 +1,43 @@
use crate::config::ServerConfig;
use dashmap::DashMap;
use lightops_common::protocol::{AgentMessage, ServerMessage};
use serde_json::Value;
use sqlx::SqlitePool;
use std::{collections::HashMap, sync::Arc};
use tokio::sync::{mpsc, oneshot, RwLock};
#[derive(Clone)]
pub struct AppState {
pub pool: SqlitePool,
pub cfg: Arc<ServerConfig>,
pub agents: Arc<RwLock<HashMap<String, AgentHandle>>>,
pub pending: Arc<DashMap<String, oneshot::Sender<TaskReply>>>,
pub pending_agents: Arc<DashMap<String, String>>,
pub streams: Arc<DashMap<String, mpsc::UnboundedSender<AgentMessage>>>,
}
#[derive(Clone)]
pub struct AgentHandle {
pub connection_id: String,
pub tx: mpsc::UnboundedSender<ServerMessage>,
}
#[derive(Debug)]
pub struct TaskReply {
pub success: bool,
pub data: Value,
pub error: Option<String>,
}
impl AppState {
pub fn new(pool: SqlitePool, cfg: ServerConfig) -> Self {
Self {
pool,
cfg: Arc::new(cfg),
agents: Arc::new(RwLock::new(HashMap::new())),
pending: Arc::new(DashMap::new()),
pending_agents: Arc::new(DashMap::new()),
streams: Arc::new(DashMap::new()),
}
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,226 @@
use crate::{
error::AppError,
state::{AppState, TaskReply},
};
use chrono::Utc;
use lightops_common::protocol::ServerMessage;
use serde_json::Value;
use tokio::sync::oneshot;
use uuid::Uuid;
pub async fn request_agent_task(
state: &AppState,
agent_id: &str,
user_id: Option<i64>,
action: &str,
params: Value,
) -> Result<TaskReply, AppError> {
request_agent_task_with_timeout(
state,
agent_id,
user_id,
action,
params,
state.cfg.task_timeout_secs,
)
.await
}
pub async fn request_agent_task_with_timeout(
state: &AppState,
agent_id: &str,
user_id: Option<i64>,
action: &str,
params: Value,
timeout_secs: u64,
) -> Result<TaskReply, AppError> {
request_agent_task_with_timeout_and_stored_params(
state,
agent_id,
user_id,
action,
params.clone(),
params,
timeout_secs,
)
.await
}
pub async fn request_agent_task_with_timeout_and_stored_params(
state: &AppState,
agent_id: &str,
user_id: Option<i64>,
action: &str,
params: Value,
stored_params: Value,
timeout_secs: u64,
) -> Result<TaskReply, AppError> {
let task_id = Uuid::new_v4().to_string();
sqlx::query(
"INSERT INTO tasks (id, agent_id, user_id, action, params_json, status, started_at) VALUES (?, ?, ?, ?, ?, 'running', ?)",
)
.bind(&task_id)
.bind(agent_id)
.bind(user_id)
.bind(action)
.bind(stored_params.to_string())
.bind(Utc::now().to_rfc3339())
.execute(&state.pool)
.await?;
append_task_event(
state,
&task_id,
"info",
"任务已创建",
Some(&serde_json::json!({ "action": action })),
)
.await?;
let handle = {
let agents = state.agents.read().await;
agents.get(agent_id).cloned()
}
.ok_or(AppError::AgentOffline)?;
let (tx, rx) = oneshot::channel();
state.pending.insert(task_id.clone(), tx);
state
.pending_agents
.insert(task_id.clone(), agent_id.to_string());
let msg = ServerMessage::TaskRequest {
task_id: task_id.clone(),
action: action.to_string(),
params,
};
if handle.tx.send(msg).is_err() {
state.pending.remove(&task_id);
state.pending_agents.remove(&task_id);
mark_task(state, &task_id, "failed", None, Some("Agent 已断开连接")).await?;
return Err(AppError::AgentOffline);
}
append_task_event(state, &task_id, "info", "任务已下发到 Agent", None).await?;
let reply = match tokio::time::timeout(std::time::Duration::from_secs(timeout_secs), rx).await {
Ok(Ok(reply)) => reply,
Ok(Err(_)) => {
state.pending_agents.remove(&task_id);
mark_task(state, &task_id, "failed", None, Some("Agent 已断开连接")).await?;
return Err(AppError::AgentOffline);
}
Err(_) => {
state.pending.remove(&task_id);
state.pending_agents.remove(&task_id);
mark_task(state, &task_id, "timeout", None, Some("任务超时")).await?;
return Err(AppError::Timeout);
}
};
let status = if reply.success { "success" } else { "failed" };
mark_task(
state,
&task_id,
status,
Some(&reply.data),
reply.error.as_deref(),
)
.await?;
Ok(reply)
}
pub async fn append_task_event(
state: &AppState,
task_id: &str,
level: &str,
message: &str,
data: Option<&Value>,
) -> Result<(), AppError> {
sqlx::query(
"INSERT INTO task_events(task_id, level, message, data_json, created_at) VALUES(?, ?, ?, ?, ?)",
)
.bind(task_id)
.bind(level)
.bind(message.chars().take(500).collect::<String>())
.bind(data.map(|value| value.to_string()))
.bind(Utc::now().to_rfc3339())
.execute(&state.pool)
.await?;
Ok(())
}
pub async fn fail_agent_pending_tasks(state: &AppState, agent_id: &str, reason: &str) {
let task_ids: Vec<String> = state
.pending_agents
.iter()
.filter(|item| item.value() == agent_id)
.map(|item| item.key().clone())
.collect();
for task_id in task_ids {
state.pending_agents.remove(&task_id);
if let Some((_, tx)) = state.pending.remove(&task_id) {
let _ = tx.send(TaskReply {
success: false,
data: serde_json::json!({}),
error: Some(reason.to_string()),
});
}
let _ = mark_task(state, &task_id, "failed", None, Some(reason)).await;
}
}
pub async fn mark_task(
state: &AppState,
task_id: &str,
status: &str,
result: Option<&Value>,
error: Option<&str>,
) -> Result<(), AppError> {
sqlx::query(
"UPDATE tasks SET status = ?, result_json = ?, error = ?, finished_at = ? WHERE id = ?",
)
.bind(status)
.bind(result.map(|v| v.to_string()))
.bind(error)
.bind(Utc::now().to_rfc3339())
.bind(task_id)
.execute(&state.pool)
.await?;
let level = if status == "success" { "info" } else { "error" };
let message = match status {
"success" => "任务成功完成",
"timeout" => "任务执行超时",
"cancelled" => "任务已取消",
_ => "任务执行失败",
};
let data = serde_json::json!({
"status": status,
"error": error,
"result": result
});
append_task_event(state, task_id, level, message, Some(&data)).await?;
Ok(())
}
pub async fn audit(
state: &AppState,
user_id: Option<i64>,
agent_id: Option<&str>,
action: &str,
target: Option<&str>,
params_summary: Option<String>,
success: bool,
error: Option<&str>,
) {
let _ = sqlx::query(
"INSERT INTO audit_logs (user_id, agent_id, action, target, params_summary, success, error) VALUES (?, ?, ?, ?, ?, ?, ?)",
)
.bind(user_id)
.bind(agent_id)
.bind(action)
.bind(target)
.bind(params_summary)
.bind(if success { 1 } else { 0 })
.bind(error)
.execute(&state.pool)
.await;
}