retention.py 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189
  1. import os
  2. import re
  3. from datetime import datetime, timedelta
  4. def apply_retention(job, new_archive_name, backup_dir):
  5. """Applique la politique de rétention après une sauvegarde réussie."""
  6. import json as _json
  7. archives = _list_archives_for_job(job, backup_dir)
  8. if job.retention_mode == "count":
  9. to_delete = _retention_count(archives, job.retention_value)
  10. elif job.retention_mode == "daily":
  11. to_delete = _retention_daily(archives, job.retention_value)
  12. elif job.retention_mode == "gfs":
  13. cfg = _json.loads(job.retention_gfs_config or "{}") if job.retention_gfs_config else {}
  14. to_delete = _retention_gfs(archives, cfg)
  15. else:
  16. return []
  17. from jobs.utils import sudo_rm
  18. deleted = []
  19. for archive_filename in to_delete:
  20. base = os.path.splitext(archive_filename)[0]
  21. for ext in (".tar", ".info.json"):
  22. full = os.path.join(backup_dir, base + ext)
  23. sudo_rm(full)
  24. deleted.append(base + ext)
  25. return deleted
  26. def _job_archive_prefix(job, instance_name):
  27. """Retourne le préfixe des archives pour ce job (ex: jerry_nextcloud_)."""
  28. if job.type == "ynh_app":
  29. import json
  30. cfg = json.loads(job.config_json or "{}")
  31. return f"{instance_name}_{cfg.get('app_id', '')}_"
  32. elif job.type == "ynh_system":
  33. return f"{instance_name}_system_"
  34. elif job.type in ("mysql", "postgresql"):
  35. import json
  36. cfg = json.loads(job.config_json or "{}")
  37. return f"{instance_name}_{job.type}_{cfg.get('database', '')}_"
  38. elif job.type == "custom_dir":
  39. label = re.sub(r'[^a-z0-9]+', '-', job.name.lower().strip()).strip('-')
  40. return f"{instance_name}_{label}_"
  41. else:
  42. return f"{instance_name}_{job.name.lower().replace(' ', '-')}_"
  43. def _list_archives_for_job(job, backup_dir):
  44. """Liste les archives correspondant à ce job, triées par date (plus ancienne en premier)."""
  45. from flask import current_app
  46. instance = current_app.config["INSTANCE_NAME"]
  47. prefix = _job_archive_prefix(job, instance)
  48. from jobs.utils import sudo_listdir
  49. archives = [
  50. fname for fname in sudo_listdir(backup_dir)
  51. if fname.startswith(prefix) and fname.endswith(".tar")
  52. ]
  53. archives.sort(key=_extract_date)
  54. return archives
  55. def apply_remote_retention(job, client):
  56. """Applique la rétention sur l'instance distante après un push.
  57. Filtre les archives par le même préfixe que le job local et applique
  58. la même politique (count/daily). Ne touche pas aux archives des autres jobs.
  59. """
  60. from flask import current_app
  61. instance = current_app.config["INSTANCE_NAME"]
  62. prefix = _job_archive_prefix(job, instance)
  63. try:
  64. remote_archives = client.get_archives()
  65. except Exception:
  66. return []
  67. matching = sorted(
  68. [a["name"] + ".tar" for a in remote_archives if a["name"].startswith(prefix)],
  69. key=_extract_date,
  70. )
  71. if job.retention_mode == "count":
  72. to_delete = _retention_count(matching, job.retention_value)
  73. elif job.retention_mode == "daily":
  74. to_delete = _retention_daily(matching, job.retention_value)
  75. elif job.retention_mode == "gfs":
  76. import json as _json
  77. cfg = _json.loads(job.retention_gfs_config or "{}") if job.retention_gfs_config else {}
  78. to_delete = _retention_gfs(matching, cfg)
  79. else:
  80. return []
  81. deleted = []
  82. for archive_filename in to_delete:
  83. base = os.path.splitext(archive_filename)[0]
  84. try:
  85. client.delete_archive(base)
  86. deleted.append(base)
  87. except Exception:
  88. pass
  89. return deleted
  90. def _extract_date(filename):
  91. match = re.search(r'(\d{8})', filename)
  92. if match:
  93. try:
  94. return datetime.strptime(match.group(1), "%Y%m%d")
  95. except ValueError:
  96. pass
  97. return datetime.min
  98. def _retention_count(archives, keep_n):
  99. if len(archives) <= keep_n:
  100. return []
  101. return archives[: len(archives) - keep_n]
  102. def _retention_daily(archives, days):
  103. cutoff = datetime.utcnow() - timedelta(days=days)
  104. to_delete = []
  105. seen_dates = set()
  106. for archive in reversed(archives):
  107. date = _extract_date(archive)
  108. if date < cutoff:
  109. to_delete.append(archive)
  110. continue
  111. date_key = date.date()
  112. if date_key in seen_dates:
  113. to_delete.append(archive)
  114. else:
  115. seen_dates.add(date_key)
  116. return to_delete
  117. def _retention_gfs(archives, config):
  118. """Politique Grandfather-Father-Son.
  119. config: {"daily": N, "weekly": M, "monthly": P}
  120. - Fils (daily) : conserve les N archives les plus récentes
  121. - Père (weekly) : conserve 1 archive par semaine sur M semaines
  122. - Grand-Père (monthly): conserve 1 archive par mois sur P mois
  123. Une archive peut satisfaire plusieurs catégories simultanément.
  124. """
  125. daily_keep = int(config.get("daily", 7))
  126. weekly_keep = int(config.get("weekly", 4))
  127. monthly_keep = int(config.get("monthly", 12))
  128. dated = []
  129. for archive in archives:
  130. d = _extract_date(archive)
  131. if d != datetime.min:
  132. dated.append((d, archive))
  133. if not dated:
  134. return []
  135. # Trier du plus récent au plus ancien
  136. dated.sort(key=lambda x: x[0], reverse=True)
  137. keepers = set()
  138. # Fils : N archives les plus récentes
  139. for _, archive in dated[:daily_keep]:
  140. keepers.add(archive)
  141. # Père : 1 archive par semaine (la plus récente de chaque semaine), M semaines
  142. seen_weeks = {}
  143. for d, archive in dated:
  144. wk = (d.isocalendar()[0], d.isocalendar()[1])
  145. if wk not in seen_weeks:
  146. seen_weeks[wk] = archive # premier = plus récent de la semaine
  147. for wk in sorted(seen_weeks, reverse=True)[:weekly_keep]:
  148. keepers.add(seen_weeks[wk])
  149. # Grand-Père : 1 archive par mois (la plus récente du mois), P mois
  150. seen_months = {}
  151. for d, archive in dated:
  152. mk = (d.year, d.month)
  153. if mk not in seen_months:
  154. seen_months[mk] = archive
  155. for mk in sorted(seen_months, reverse=True)[:monthly_keep]:
  156. keepers.add(seen_months[mk])
  157. return [archive for _, archive in dated if archive not in keepers]