Files
tekton/addons/godot_ai/update_reload_runner.gd

533 lines
21 KiB
GDScript

@tool
extends Node
## EditorSetting key used to defer a self_update telemetry event across the
## disable -> enable boundary. The runner runs while the plugin is disabled,
## so it can't send WebSocket events directly; it writes the outcome here
## and the re-enabled plugin's `_enter_tree` flushes it. See
## `plugin.gd::_flush_pending_self_update_telemetry`.
const PENDING_SELF_UPDATE_TELEMETRY_KEY := "godot_ai/pending_self_update_event"
## Self-update runner. Owns the install-and-reload sequence from
## `start(zip_path, temp_dir, detached_dock)` onward: extract files into
## `addons/godot_ai/` with rollback bookkeeping, scan the filesystem,
## re-enable the plugin, and clean up the detached dock.
##
## Single-phase install: writes the full `_new_file_paths +
## _existing_file_paths` set before issuing exactly one
## `EditorFileSystem.scan()`. Godot's scan-time reparse pass then sees one
## consistent v(N+1) snapshot, so new files and existing files can resolve
## each other's same-release API changes regardless of parse order.
##
## Not owned here: HTTP download (in `utils/update_manager.gd`), banner UI
## (in `mcp_dock.gd`), or server stop prep (called by
## `plugin.gd::install_downloaded_update` before this runner starts via
## `_lifecycle.prepare_for_update_reload()`).
##
## This node is deliberately tiny and not parented under the EditorPlugin:
## it survives `set_plugin_enabled(false)`, extracts the downloaded release,
## waits for Godot's filesystem scan, then enables the plugin again. The old
## dock is detached before this runner starts, kept alive while deferred
## Callables drain, and freed only after the new plugin instance is loaded.
const PLUGIN_CFG_PATH := "res://addons/godot_ai/plugin.cfg"
const PRE_DISABLE_DRAIN_FRAMES := 8
const POST_DISABLE_DRAIN_FRAMES := 2
const POST_ENABLE_FREE_FRAMES := 8
const INSTALL_BASE_PATH := "res://"
const ZIP_ADDON_PREFIX := "addons/godot_ai/"
const TEMP_FILE_SUFFIX := ".godot_ai_update_tmp"
const INSTALL_BACKUP_SUFFIX := ".update_backup"
## Outcome of `_install_zip_paths`. `OK` means all listed files were replaced.
## `FAILED_CLEAN` means a write/rename failed mid-batch but every previously
## written file was rolled back to its vN content (or removed, if the file
## was new in vN+1). `FAILED_MIXED` means rollback itself failed: the addons
## tree contains a mix of vN and vN+1 files. The runner MUST NOT re-enable
## the plugin in the MIXED case — see issue #297 finding #9 for the data-loss
## scenario this guards against.
enum InstallStatus { OK, FAILED_CLEAN, FAILED_MIXED }
var _zip_path := ""
var _temp_dir := ""
var _detached_dock = null
var _started := false
var _next_step := ""
var _frames_remaining := 0
var _waiting_for_scan := false
var _scan_next_step := ""
## Watchdog for `_start_filesystem_scan`: if Godot's `filesystem_changed`
## signal never fires (slow disk, NFS, AV holding the just-extracted addon
## files open), the runner used to hang in `_waiting_for_scan = true`
## forever and the dock stayed disabled. After this timeout we disconnect
## the signal and proceed anyway — worst case the new files aren't visible
## on the first frame, but they get picked up on the next scan. See
## audit-v2 finding #9 (issue #353). Untyped to match the codebase's
## defensive pattern for state that survives `fs.scan()` during update.
const SCAN_WATCHDOG_SECS := 30.0
var _scan_watchdog_timer = null
## Sticky flag set by `_on_scan_watchdog_timeout`. Subsequent
## `_start_filesystem_scan` calls in the same update bypass connect+scan
## so a delayed `filesystem_changed` emission from the timed-out scan
## can't fire on a freshly-armed listener for the next scan and falsely
## settle it before that scan actually completed. See PR #381 review for
## the cross-scan race this guards against.
var _scan_timed_out := false
## Keep Array fields untyped: this runner survives fs.scan() during update,
## and typed Variant storage is part of the hot-reload crash class.
var _new_file_paths = []
var _existing_file_paths = []
## Per-file install records accumulated during install so a later failure
## can roll back files already replaced earlier in the same update.
## Each entry is an untyped Dictionary with target_path / backup_path /
## had_original keys. Cleared by `_finalize_install_success` on full success
## and by `_rollback_paths_written` on failure.
var _paths_written = []
## Set true if `_install_zip_file`'s inner restore-from-backup couldn't
## complete (backup gone, copy failed). The failed file is NOT recorded in
## `_paths_written` because the function bails at that point — without this
## flag, `_rollback_paths_written` would walk only the prior records, all
## restore cleanly, and report FAILED_CLEAN even though the current target
## is missing or stale on disk. Surfaces FAILED_MIXED so the runner refuses
## to re-enable the plugin against a half-installed tree.
var _restore_failed := false
## Test-only opt-out for the scan-watchdog `push_warning` lines. The
## watchdog unit tests in `test_update_reload_runner.gd` invoke
## `_on_scan_watchdog_timeout()` and the post-timeout
## `_start_filesystem_scan` bypass branch directly to pin their behavior
## — but those code paths' `push_warning` calls then appear as yellow
## console noise in every `test_run`, training reviewers to ignore the
## runner's real production warnings. Tests set this true; production
## leaves it false so genuine scan timeouts during a real self-update
## still surface loudly. See issue #413.
var _suppress_scan_warnings := false
func start(zip_path: String, temp_dir: String, detached_dock) -> void:
if _started:
return
_started = true
_zip_path = zip_path
_temp_dir = temp_dir
_detached_dock = detached_dock
_wait_frames(PRE_DISABLE_DRAIN_FRAMES, "_disable_old_plugin")
func _process(_delta: float) -> void:
if _frames_remaining <= 0:
set_process(false)
return
_frames_remaining -= 1
if _frames_remaining <= 0:
var step := _next_step
_next_step = ""
set_process(false)
call(step)
func _wait_frames(frame_count: int, next_step: String) -> void:
_next_step = next_step
_frames_remaining = max(1, frame_count)
set_process(true)
func _disable_old_plugin() -> void:
## Disable before writing or scanning new scripts. This avoids both the
## Dict/Array field-storage hot-reload crash (#245) and cached handler
## constructor shape mismatches (#247) for plugin-owned instances.
print("MCP | update runner disabling old plugin")
EditorInterface.set_plugin_enabled(PLUGIN_CFG_PATH, false)
_wait_frames(POST_DISABLE_DRAIN_FRAMES, "_extract_and_scan")
func _extract_and_scan() -> void:
if not _read_update_manifest():
EditorInterface.set_plugin_enabled(PLUGIN_CFG_PATH, true)
_wait_frames(POST_ENABLE_FREE_FRAMES, "_cleanup_and_finish")
return
var install_paths := []
install_paths.append_array(_new_file_paths)
install_paths.append_array(_existing_file_paths)
var status := _install_zip_paths(install_paths)
if status != InstallStatus.OK:
_handle_install_failure(status)
return
_finalize_install_success()
_cleanup_update_temp()
## One scan covers both dependency directions: plugin.gd's preloads of
## new files resolve because those files are already present, and new
## files' references to new members or static-ness changes on existing
## load-surface scripts resolve because those existing files are also
## already at v(N+1). The goal is a consistent snapshot before scan, not
## a tree-atomic install; per-file writes still use `.tmp` + rename and
## rollback on failure.
_start_filesystem_scan("_enable_new_plugin")
func _start_filesystem_scan(next_step: String = "_enable_new_plugin") -> void:
var fs := EditorInterface.get_resource_filesystem()
var deferred_step := next_step if not next_step.is_empty() else "_enable_new_plugin"
if fs == null:
call_deferred(deferred_step)
return
## Bypass: a previous scan in this update already watchdog'd, so the
## editor's filesystem is unresponsive. Re-arming a `filesystem_changed`
## listener now would race with a delayed emission from the timed-out
## scan: that single emission would fire whichever listener is currently
## connected to the shared signal, falsely settling this scan before it
## actually completed. Skip the wait; Godot's normal background scan
## catches up after the plugin re-enables. See PR #381 review.
if _scan_timed_out:
if not _suppress_scan_warnings:
push_warning(
"MCP | skipping filesystem_changed wait after previous timeout (next_step=%s)"
% deferred_step
)
call_deferred(deferred_step)
return
_waiting_for_scan = true
_scan_next_step = deferred_step
if not fs.filesystem_changed.is_connected(_on_filesystem_changed):
fs.filesystem_changed.connect(_on_filesystem_changed, CONNECT_ONE_SHOT)
_arm_scan_watchdog()
fs.scan()
func _arm_scan_watchdog() -> void:
if _scan_watchdog_timer == null:
_scan_watchdog_timer = Timer.new()
_scan_watchdog_timer.one_shot = true
_scan_watchdog_timer.timeout.connect(_on_scan_watchdog_timeout)
add_child(_scan_watchdog_timer)
_scan_watchdog_timer.start(SCAN_WATCHDOG_SECS)
func _stop_scan_watchdog() -> void:
if _scan_watchdog_timer != null:
_scan_watchdog_timer.stop()
func _on_scan_watchdog_timeout() -> void:
## Signal didn't fire within SCAN_WATCHDOG_SECS — most likely the
## filesystem scan is blocked behind a slow disk / NFS / AV scanner
## still reading the just-extracted addon files.
## Set the sticky `_scan_timed_out` flag so any subsequent
## `_start_filesystem_scan` in this update skips its connect+scan
## (otherwise a delayed emission from this scan would falsely settle
## the next scan's listener — see PR #381 review).
## Disconnect the current listener too, so this scan's listener can't
## double-call `_finish_scan_wait` if the signal arrives quickly after
## the timeout fires. `_finish_scan_wait` is idempotent on
## `_waiting_for_scan == false`.
if not _waiting_for_scan:
return
if not _suppress_scan_warnings:
push_warning(
"MCP | filesystem_changed didn't fire within %ds; proceeding without scan confirmation"
% int(SCAN_WATCHDOG_SECS)
)
_scan_timed_out = true
var fs := EditorInterface.get_resource_filesystem()
if fs != null and fs.filesystem_changed.is_connected(_on_filesystem_changed):
fs.filesystem_changed.disconnect(_on_filesystem_changed)
_finish_scan_wait()
func _read_update_manifest() -> bool:
var zip_path := ProjectSettings.globalize_path(_zip_path)
var install_base := ProjectSettings.globalize_path(INSTALL_BASE_PATH)
var reader := ZIPReader.new()
if reader.open(zip_path) != OK:
print("MCP | update extract failed: could not open %s" % zip_path)
return false
_new_file_paths.clear()
_existing_file_paths.clear()
var has_plugin_cfg := false
var has_plugin_script := false
var files := reader.get_files()
for file_path in files:
if not file_path.begins_with(ZIP_ADDON_PREFIX):
continue
var rel_path := file_path.trim_prefix(ZIP_ADDON_PREFIX)
## Many zip builders (`zip -r` without `-D`, AssetLib uploads, hand-
## built archives) emit zero-byte directory entries like
## `addons/godot_ai/`. Skip those before the safety check; the
## empty-segment guard in `_is_safe_zip_addon_file` would otherwise
## flag the bare prefix as unsafe and abort the extract. Current
## release.yml passes `-D` to strip them, but installed runners must
## still tolerate older or manually built zips.
if rel_path.is_empty() or file_path.ends_with("/"):
continue
if not _is_safe_zip_addon_file(file_path):
print("MCP | update extract failed: unsafe zip path %s" % file_path)
reader.close()
return false
if rel_path == "plugin.cfg":
has_plugin_cfg = true
elif rel_path == "plugin.gd":
has_plugin_script = true
var target_path := install_base.path_join(file_path)
if FileAccess.file_exists(target_path):
_existing_file_paths.append(file_path)
else:
_new_file_paths.append(file_path)
reader.close()
if not has_plugin_cfg:
print("MCP | update extract failed: zip is missing plugin.cfg")
return false
if not has_plugin_script:
print("MCP | update extract failed: zip is missing plugin.gd")
return false
return true
func _handle_install_failure(status: int) -> void:
_record_pending_self_update({
"status": "failed_mixed" if status == InstallStatus.FAILED_MIXED else "failed_clean",
})
if status == InstallStatus.FAILED_MIXED:
## Half-installed addon tree on disk: re-enabling the plugin would
## load a mix of vN and vN+1 files. Print a load-bearing diagnostic
## and bail without re-enabling — user must restore manually. See
## issue #297 finding #9 for the data-loss scenario.
push_error(
"MCP | self-update failed mid-install AND rollback could not"
+ " restore the previous addons/godot_ai/ contents. The plugin"
+ " is left disabled. Inspect addons/godot_ai/ for"
+ " *.update_backup / *.godot_ai_update_tmp files and restore"
+ " manually before re-enabling the plugin."
)
print(
"MCP | self-update aborted: addons/godot_ai/ is in a mixed state;"
+ " plugin left disabled (manual intervention required)."
)
_wait_frames(POST_ENABLE_FREE_FRAMES, "_cleanup_and_finish")
return
## FAILED_CLEAN: rollback restored every previously-written file. Safe
## to re-enable the previous plugin version.
print("MCP | self-update rolled back; re-enabling previous plugin version")
EditorInterface.set_plugin_enabled(PLUGIN_CFG_PATH, true)
_wait_frames(POST_ENABLE_FREE_FRAMES, "_cleanup_and_finish")
func _is_safe_zip_addon_file(file_path: String) -> bool:
if file_path.is_absolute_path() or file_path.contains("\\"):
return false
if not file_path.begins_with(ZIP_ADDON_PREFIX):
return false
var rel_path := file_path.trim_prefix(ZIP_ADDON_PREFIX)
if rel_path.is_empty() or rel_path.ends_with("/"):
return false
for segment in rel_path.split("/", true):
if segment.is_empty() or segment == "." or segment == "..":
return false
return true
func _install_zip_paths(paths: Array) -> int:
if paths.is_empty():
return InstallStatus.OK
var zip_path := ProjectSettings.globalize_path(_zip_path)
var reader := ZIPReader.new()
if reader.open(zip_path) != OK:
print("MCP | update extract failed: could not reopen %s" % zip_path)
## Nothing else can be written, but earlier files from this update
## may have landed on disk; roll those back too.
return _rollback_paths_written()
var install_base := ProjectSettings.globalize_path(INSTALL_BASE_PATH)
for file_path in paths:
var record := _install_zip_file(reader, String(file_path), install_base)
if record.is_empty():
reader.close()
return _rollback_paths_written()
_paths_written.append(record)
reader.close()
return InstallStatus.OK
func _install_zip_file(
reader: ZIPReader, file_path: String, install_base: String
) -> Dictionary:
var target_path := install_base.path_join(file_path)
var dir := target_path.get_base_dir()
if DirAccess.make_dir_recursive_absolute(dir) != OK:
print("MCP | update extract failed: could not create %s" % dir)
return {}
var temp_path := target_path + TEMP_FILE_SUFFIX
DirAccess.remove_absolute(temp_path)
var content := reader.read_file(file_path)
var f := FileAccess.open(temp_path, FileAccess.WRITE)
if f == null:
print("MCP | update extract failed: could not write %s (error %d)" % [
temp_path,
FileAccess.get_open_error(),
])
return {}
f.store_buffer(content)
var write_error := f.get_error()
f.close()
if write_error != OK:
print("MCP | update extract failed: write error %d for %s" % [
write_error,
temp_path,
])
DirAccess.remove_absolute(temp_path)
return {}
## Back up the original via COPY (not rename) so the source of truth
## stays in place if a later step fails. Rolled back via
## `_rollback_paths_written` if a subsequent file in this batch — or a
## later batch — can't be installed.
var had_original := FileAccess.file_exists(target_path)
var backup_path := target_path + INSTALL_BACKUP_SUFFIX
if had_original:
DirAccess.remove_absolute(backup_path)
if DirAccess.copy_absolute(target_path, backup_path) != OK:
DirAccess.remove_absolute(temp_path)
print("MCP | update extract failed: could not back up %s" % target_path)
return {}
if DirAccess.rename_absolute(temp_path, target_path) != OK:
## POSIX and APFS replace atomically. Some filesystems reject
## rename-over-existing; keep a fallback so the update can still
## proceed, but the common path never exposes a truncated target.
DirAccess.remove_absolute(target_path)
if DirAccess.rename_absolute(temp_path, target_path) != OK:
DirAccess.remove_absolute(temp_path)
## Target was removed above; restore from the COPY backup so the
## addons dir is left in its vN state before we surface failure.
## Only delete the backup if the restore copy actually succeeded
## — if it didn't, target_path is missing, and `_restore_failed`
## tells `_rollback_paths_written` to surface FAILED_MIXED so the
## runner refuses to re-enable the plugin. Leaving the backup on
## disk also gives the user a manual recovery path. Without this
## guard the failed file isn't tracked anywhere (we return `{}`,
## not appended to `_paths_written`) and the caller would
## erroneously see FAILED_CLEAN.
if had_original:
if (
FileAccess.file_exists(backup_path)
and DirAccess.copy_absolute(backup_path, target_path) == OK
):
DirAccess.remove_absolute(backup_path)
else:
_restore_failed = true
print("MCP | update extract failed: could not replace %s" % target_path)
return {}
return {
"target_path": target_path,
"backup_path": backup_path,
"had_original": had_original,
}
## Restore (or remove) every file already touched in this update. Safe to
## call after a partial install — entries are processed in reverse so a
## given target is restored before the next earlier write of the same path
## could resurrect a stale value. Returns FAILED_CLEAN if every entry was
## restored AND no in-flight `_install_zip_file` left a target stranded
## (`_restore_failed`); FAILED_MIXED otherwise. The caller MUST NOT
## re-enable the plugin in the MIXED case.
func _rollback_paths_written() -> int:
var any_failed := false
var i := _paths_written.size() - 1
while i >= 0:
var record = _paths_written[i]
var target := String(record.get("target_path", ""))
var backup := String(record.get("backup_path", ""))
var had_original := bool(record.get("had_original", false))
if had_original:
if not FileAccess.file_exists(backup):
print("MCP | update rollback failed: backup missing for %s" % target)
any_failed = true
else:
DirAccess.remove_absolute(target)
if DirAccess.copy_absolute(backup, target) != OK:
print("MCP | update rollback failed: could not restore %s" % target)
any_failed = true
else:
DirAccess.remove_absolute(backup)
else:
if FileAccess.file_exists(target):
if DirAccess.remove_absolute(target) != OK:
print(
"MCP | update rollback failed: could not delete %s" % target
)
any_failed = true
i -= 1
_paths_written.clear()
if any_failed or _restore_failed:
return InstallStatus.FAILED_MIXED
return InstallStatus.FAILED_CLEAN
## Discard accumulated backups after the combined install succeeds. Backups
## are best-effort: a failure here doesn't compromise the new install, just
## leaves stray *.update_backup files for the user to clean up.
func _finalize_install_success() -> void:
for record in _paths_written:
if record.get("had_original", false):
DirAccess.remove_absolute(String(record.get("backup_path", "")))
_paths_written.clear()
_record_pending_self_update({"status": "success"})
## Persist a self_update event description so the re-enabled plugin can
## emit it once its WebSocket is connected. Survives the disable -> enable
## window where the runner cannot send anything itself.
func _record_pending_self_update(data: Dictionary) -> void:
var settings := EditorInterface.get_editor_settings()
if settings == null:
return
settings.set_setting(PENDING_SELF_UPDATE_TELEMETRY_KEY, JSON.stringify(data))
func _cleanup_update_temp() -> void:
DirAccess.remove_absolute(ProjectSettings.globalize_path(_zip_path))
DirAccess.remove_absolute(ProjectSettings.globalize_path(_temp_dir))
func _on_filesystem_changed() -> void:
_finish_scan_wait()
func _finish_scan_wait() -> void:
if not _waiting_for_scan:
return
_waiting_for_scan = false
_stop_scan_watchdog()
var next_step := _scan_next_step
_scan_next_step = ""
set_process(false)
if next_step.is_empty():
next_step = "_enable_new_plugin"
call_deferred(next_step)
func _enable_new_plugin() -> void:
print("MCP | update runner enabling new plugin")
EditorInterface.set_plugin_enabled(PLUGIN_CFG_PATH, true)
_wait_frames(POST_ENABLE_FREE_FRAMES, "_cleanup_and_finish")
func _cleanup_and_finish() -> void:
_cleanup_detached_dock()
queue_free()
func _cleanup_detached_dock() -> void:
if _detached_dock != null and is_instance_valid(_detached_dock):
_detached_dock.queue_free()
_detached_dock = null