]> git.proxmox.com Git - proxmox-offline-mirror.git/blob - src/mirror.rs
fix #4259: mirror: add ignore-errors option
[proxmox-offline-mirror.git] / src / mirror.rs
1 use std::{
2 cmp::max,
3 collections::HashMap,
4 io::Read,
5 path::{Path, PathBuf},
6 };
7
8 use anyhow::{bail, format_err, Error};
9 use flate2::bufread::GzDecoder;
10 use nix::libc;
11 use proxmox_http::{client::sync::Client, HttpClient, HttpOptions};
12 use proxmox_sys::fs::file_get_contents;
13
14 use crate::{
15 config::{MirrorConfig, SubscriptionKey},
16 convert_repo_line,
17 pool::Pool,
18 types::{Snapshot, SNAPSHOT_REGEX},
19 FetchResult, Progress,
20 };
21 use proxmox_apt::{
22 deb822::{
23 CheckSums, CompressionType, FileReference, FileReferenceType, PackagesFile, ReleaseFile,
24 },
25 repositories::{APTRepository, APTRepositoryPackageType},
26 };
27
28 use crate::helpers;
29
30 fn mirror_dir(config: &MirrorConfig) -> String {
31 format!("{}/{}", config.base_dir, config.id)
32 }
33
34 pub(crate) fn pool(config: &MirrorConfig) -> Result<Pool, Error> {
35 let pool_dir = format!("{}/.pool", config.base_dir);
36 Pool::open(Path::new(&mirror_dir(config)), Path::new(&pool_dir))
37 }
38
39 /// `MirrorConfig`, but some fields converted/parsed into usable types.
40 struct ParsedMirrorConfig {
41 pub repository: APTRepository,
42 pub architectures: Vec<String>,
43 pub pool: Pool,
44 pub key: Vec<u8>,
45 pub verify: bool,
46 pub sync: bool,
47 pub auth: Option<String>,
48 pub client: Client,
49 pub ignore_errors: bool,
50 }
51
52 impl TryInto<ParsedMirrorConfig> for MirrorConfig {
53 type Error = anyhow::Error;
54
55 fn try_into(self) -> Result<ParsedMirrorConfig, Self::Error> {
56 let pool = pool(&self)?;
57
58 let repository = convert_repo_line(self.repository.clone())?;
59
60 let key = file_get_contents(Path::new(&self.key_path))?;
61
62 let options = HttpOptions {
63 user_agent: Some("proxmox-offline-mirror 0.1".to_string()),
64 ..Default::default()
65 }; // TODO actually read version ;)
66
67 let client = Client::new(options);
68
69 Ok(ParsedMirrorConfig {
70 repository,
71 architectures: self.architectures,
72 pool,
73 key,
74 verify: self.verify,
75 sync: self.sync,
76 auth: None,
77 client,
78 ignore_errors: self.ignore_errors,
79 })
80 }
81 }
82
83 // Helper to get absolute URL for dist-specific relative `path`.
84 fn get_dist_url(repo: &APTRepository, path: &str) -> String {
85 let dist_root = format!("{}/dists/{}", repo.uris[0], repo.suites[0]);
86
87 format!("{}/{}", dist_root, path)
88 }
89
90 // Helper to get dist-specific path given a `prefix` (snapshot dir) and relative `path`.
91 fn get_dist_path(repo: &APTRepository, prefix: &Path, path: &str) -> PathBuf {
92 let mut base = PathBuf::from(prefix);
93 base.push("dists");
94 base.push(&repo.suites[0]);
95 base.push(path);
96 base
97 }
98
99 // Helper to get generic URL given a `repo` and `path`.
100 fn get_repo_url(repo: &APTRepository, path: &str) -> String {
101 format!("{}/{}", repo.uris[0], path)
102 }
103
104 /// Helper to fetch file from URI and optionally verify the responses checksum.
105 ///
106 /// Only fetches and returns data, doesn't store anything anywhere.
107 fn fetch_repo_file(
108 client: &Client,
109 uri: &str,
110 max_size: usize,
111 checksums: Option<&CheckSums>,
112 auth: Option<&str>,
113 ) -> Result<FetchResult, Error> {
114 println!("-> GET '{}'..", uri);
115
116 let headers = if let Some(auth) = auth {
117 let mut map = HashMap::new();
118 map.insert("Authorization".to_string(), auth.to_string());
119 Some(map)
120 } else {
121 None
122 };
123
124 let response = client.get(uri, headers.as_ref())?;
125
126 let reader: Box<dyn Read> = response.into_body();
127 let mut reader = reader.take(max_size as u64);
128 let mut data = Vec::new();
129 reader.read_to_end(&mut data)?;
130
131 if let Some(checksums) = checksums {
132 checksums.verify(&data)?;
133 }
134
135 Ok(FetchResult {
136 fetched: data.len(),
137 data,
138 })
139 }
140
141 /// Helper to fetch InRelease (`detached` == false) or Release/Release.gpg (`detached` == true) files from repository.
142 ///
143 /// Verifies the contained/detached signature, stores all fetched files under `prefix`, and returns the verified raw release file data.
144 fn fetch_release(
145 config: &ParsedMirrorConfig,
146 prefix: &Path,
147 detached: bool,
148 dry_run: bool,
149 ) -> Result<FetchResult, Error> {
150 let (name, fetched, sig) = if detached {
151 println!("Fetching Release/Release.gpg files");
152 let sig = fetch_repo_file(
153 &config.client,
154 &get_dist_url(&config.repository, "Release.gpg"),
155 1024 * 1024,
156 None,
157 config.auth.as_deref(),
158 )?;
159 let mut fetched = fetch_repo_file(
160 &config.client,
161 &get_dist_url(&config.repository, "Release"),
162 256 * 1024 * 1024,
163 None,
164 config.auth.as_deref(),
165 )?;
166 fetched.fetched += sig.fetched;
167 ("Release(.gpg)", fetched, Some(sig.data()))
168 } else {
169 println!("Fetching InRelease file");
170 let fetched = fetch_repo_file(
171 &config.client,
172 &get_dist_url(&config.repository, "InRelease"),
173 256 * 1024 * 1024,
174 None,
175 config.auth.as_deref(),
176 )?;
177 ("InRelease", fetched, None)
178 };
179
180 println!("Verifying '{name}' signature using provided repository key..");
181 let content = fetched.data_ref();
182 let verified = helpers::verify_signature(content, &config.key, sig.as_deref())?;
183 println!("Success");
184
185 let sha512 = Some(openssl::sha::sha512(content));
186 let csums = CheckSums {
187 sha512,
188 ..Default::default()
189 };
190
191 if dry_run {
192 return Ok(FetchResult {
193 data: verified,
194 fetched: fetched.fetched,
195 });
196 }
197
198 let locked = &config.pool.lock()?;
199
200 if !locked.contains(&csums) {
201 locked.add_file(content, &csums, config.sync)?;
202 }
203
204 if detached {
205 locked.link_file(
206 &csums,
207 Path::new(&get_dist_path(&config.repository, prefix, "Release")),
208 )?;
209 let sig = sig.unwrap();
210 let sha512 = Some(openssl::sha::sha512(&sig));
211 let csums = CheckSums {
212 sha512,
213 ..Default::default()
214 };
215 if !locked.contains(&csums) {
216 locked.add_file(&sig, &csums, config.sync)?;
217 }
218 locked.link_file(
219 &csums,
220 Path::new(&get_dist_path(&config.repository, prefix, "Release.gpg")),
221 )?;
222 } else {
223 locked.link_file(
224 &csums,
225 Path::new(&get_dist_path(&config.repository, prefix, "InRelease")),
226 )?;
227 }
228
229 Ok(FetchResult {
230 data: verified,
231 fetched: fetched.fetched,
232 })
233 }
234
235 /// Helper to fetch an index file referenced by a `ReleaseFile`.
236 ///
237 /// Since these usually come in compressed and uncompressed form, with the latter often not actually existing in the source repository as file, this fetches and if necessary decompresses to obtain a copy of the uncompressed data.
238 /// Will skip fetching if both references are already available with the expected checksum in the pool, in which case they will just be re-linked under the new path.
239 ///
240 /// Returns the uncompressed data.
241 fn fetch_index_file(
242 config: &ParsedMirrorConfig,
243 prefix: &Path,
244 reference: &FileReference,
245 uncompressed: Option<&FileReference>,
246 by_hash: bool,
247 dry_run: bool,
248 ) -> Result<FetchResult, Error> {
249 let url = get_dist_url(&config.repository, &reference.path);
250 let path = get_dist_path(&config.repository, prefix, &reference.path);
251
252 if let Some(uncompressed) = uncompressed {
253 let uncompressed_path = get_dist_path(&config.repository, prefix, &uncompressed.path);
254
255 if config.pool.contains(&reference.checksums)
256 && config.pool.contains(&uncompressed.checksums)
257 {
258 let data = config
259 .pool
260 .get_contents(&uncompressed.checksums, config.verify)?;
261
262 if dry_run {
263 return Ok(FetchResult { data, fetched: 0 });
264 }
265 // Ensure they're linked at current path
266 config.pool.lock()?.link_file(&reference.checksums, &path)?;
267 config
268 .pool
269 .lock()?
270 .link_file(&uncompressed.checksums, &uncompressed_path)?;
271 return Ok(FetchResult { data, fetched: 0 });
272 }
273 }
274
275 let urls = if by_hash {
276 let mut urls = Vec::new();
277 if let Some((base_url, _file_name)) = url.rsplit_once('/') {
278 if let Some(sha512) = reference.checksums.sha512 {
279 urls.push(format!("{base_url}/by-hash/SHA512/{}", hex::encode(sha512)));
280 }
281 if let Some(sha256) = reference.checksums.sha256 {
282 urls.push(format!("{base_url}/by-hash/SHA256/{}", hex::encode(sha256)));
283 }
284 }
285 urls.push(url);
286 urls
287 } else {
288 vec![url]
289 };
290
291 let res = urls
292 .iter()
293 .fold(None, |res, url| match res {
294 Some(Ok(res)) => Some(Ok(res)),
295 _ => Some(fetch_plain_file(
296 config,
297 url,
298 &path,
299 reference.size,
300 &reference.checksums,
301 true,
302 dry_run,
303 )),
304 })
305 .ok_or_else(|| format_err!("Failed to retrieve {}", reference.path))??;
306
307 let mut buf = Vec::new();
308 let raw = res.data_ref();
309
310 let decompressed = match reference.file_type.compression() {
311 None => raw,
312 Some(CompressionType::Gzip) => {
313 let mut gz = GzDecoder::new(raw);
314 gz.read_to_end(&mut buf)?;
315 &buf[..]
316 }
317 Some(CompressionType::Bzip2) => {
318 let mut bz = bzip2::read::BzDecoder::new(raw);
319 bz.read_to_end(&mut buf)?;
320 &buf[..]
321 }
322 Some(CompressionType::Lzma) | Some(CompressionType::Xz) => {
323 let mut xz = xz2::read::XzDecoder::new_multi_decoder(raw);
324 xz.read_to_end(&mut buf)?;
325 &buf[..]
326 }
327 };
328 let res = FetchResult {
329 data: decompressed.to_owned(),
330 fetched: res.fetched,
331 };
332
333 if dry_run {
334 return Ok(res);
335 }
336
337 let locked = &config.pool.lock()?;
338 if let Some(uncompressed) = uncompressed {
339 if !locked.contains(&uncompressed.checksums) {
340 locked.add_file(decompressed, &uncompressed.checksums, config.sync)?;
341 }
342
343 // Ensure it's linked at current path
344 let uncompressed_path = get_dist_path(&config.repository, prefix, &uncompressed.path);
345 locked.link_file(&uncompressed.checksums, &uncompressed_path)?;
346 }
347
348 Ok(res)
349 }
350
351 /// Helper to fetch arbitrary files like binary packages.
352 ///
353 /// Will skip fetching if matching file already exists locally, in which case it will just be re-linked under the new path.
354 ///
355 /// If need_data is false and the mirror config is set to skip verification, reading the file's content will be skipped as well if fetching was skipped.
356 fn fetch_plain_file(
357 config: &ParsedMirrorConfig,
358 url: &str,
359 file: &Path,
360 max_size: usize,
361 checksums: &CheckSums,
362 need_data: bool,
363 dry_run: bool,
364 ) -> Result<FetchResult, Error> {
365 let locked = &config.pool.lock()?;
366 let res = if locked.contains(checksums) {
367 if need_data || config.verify {
368 locked
369 .get_contents(checksums, config.verify)
370 .map(|data| FetchResult { data, fetched: 0 })?
371 } else {
372 // performance optimization for .deb files if verify is false
373 // we never need the file contents and they make up the bulk of a repo
374 FetchResult {
375 data: vec![],
376 fetched: 0,
377 }
378 }
379 } else if dry_run && !need_data {
380 FetchResult {
381 data: vec![],
382 fetched: 0,
383 }
384 } else {
385 let fetched = fetch_repo_file(
386 &config.client,
387 url,
388 max_size,
389 Some(checksums),
390 config.auth.as_deref(),
391 )?;
392 locked.add_file(fetched.data_ref(), checksums, config.verify)?;
393 fetched
394 };
395
396 if !dry_run {
397 // Ensure it's linked at current path
398 locked.link_file(checksums, file)?;
399 }
400
401 Ok(res)
402 }
403
404 /// Initialize a new mirror (by creating the corresponding pool).
405 pub fn init(config: &MirrorConfig) -> Result<(), Error> {
406 let pool_dir = format!("{}/.pool", config.base_dir);
407
408 let dir = format!("{}/{}", config.base_dir, config.id);
409
410 Pool::create(Path::new(&dir), Path::new(&pool_dir))?;
411 Ok(())
412 }
413
414 /// Destroy a mirror (by destroying the corresponding pool's link dir followed by GC).
415 pub fn destroy(config: &MirrorConfig) -> Result<(), Error> {
416 let pool: Pool = pool(config)?;
417 pool.lock()?.destroy()?;
418
419 Ok(())
420 }
421
422 /// List snapshots
423 pub fn list_snapshots(config: &MirrorConfig) -> Result<Vec<Snapshot>, Error> {
424 let _pool: Pool = pool(config)?;
425
426 let mut list: Vec<Snapshot> = vec![];
427
428 let dir = mirror_dir(config);
429
430 let path = Path::new(&dir);
431
432 proxmox_sys::fs::scandir(
433 libc::AT_FDCWD,
434 path,
435 &SNAPSHOT_REGEX,
436 |_l2_fd, snapshot, file_type| {
437 if file_type != nix::dir::Type::Directory {
438 return Ok(());
439 }
440
441 list.push(snapshot.parse()?);
442
443 Ok(())
444 },
445 )?;
446
447 list.sort_unstable();
448
449 Ok(list)
450 }
451
452 /// Create a new snapshot of the remote repository, fetching and storing files as needed.
453 ///
454 /// Operates in three phases:
455 /// - Fetch and verify release files
456 /// - Fetch referenced indices according to config
457 /// - Fetch binary packages referenced by package indices
458 ///
459 /// Files will be linked in a temporary directory and only renamed to the final, valid snapshot directory at the end. In case of error, leftover `XXX.tmp` directories at the top level of `base_dir` can be safely removed once the next snapshot was successfully created, as they only contain hardlinks.
460 pub fn create_snapshot(
461 config: MirrorConfig,
462 snapshot: &Snapshot,
463 subscription: Option<SubscriptionKey>,
464 dry_run: bool,
465 ) -> Result<(), Error> {
466 let auth = if let Some(product) = &config.use_subscription {
467 match subscription {
468 None => {
469 bail!(
470 "Mirror {} requires a subscription key, but none given.",
471 config.id
472 );
473 }
474 Some(key) if key.product() == *product => {
475 let base64 = base64::encode(format!("{}:{}", key.key, key.server_id));
476 Some(format!("basic {base64}"))
477 }
478 Some(key) => {
479 bail!(
480 "Repository product type '{}' and key product type '{}' don't match.",
481 product,
482 key.product()
483 );
484 }
485 }
486 } else {
487 None
488 };
489
490 let mut config: ParsedMirrorConfig = config.try_into()?;
491 config.auth = auth;
492
493 let prefix = format!("{snapshot}.tmp");
494 let prefix = Path::new(&prefix);
495
496 let mut total_progress = Progress::new();
497
498 let parse_release = |res: FetchResult, name: &str| -> Result<ReleaseFile, Error> {
499 println!("Parsing {name}..");
500 let parsed: ReleaseFile = res.data[..].try_into()?;
501 println!(
502 "'{name}' file has {} referenced files..",
503 parsed.files.len()
504 );
505 Ok(parsed)
506 };
507
508 // we want both on-disk for compat reasons
509 let res = fetch_release(&config, prefix, true, dry_run)?;
510 total_progress.update(&res);
511 let _release = parse_release(res, "Release")?;
512
513 let res = fetch_release(&config, prefix, false, dry_run)?;
514 total_progress.update(&res);
515 let release = parse_release(res, "InRelease")?;
516
517 let mut per_component = HashMap::new();
518 let mut others = Vec::new();
519 let binary = &config
520 .repository
521 .types
522 .contains(&APTRepositoryPackageType::Deb);
523 let source = &config
524 .repository
525 .types
526 .contains(&APTRepositoryPackageType::DebSrc);
527
528 for (basename, references) in &release.files {
529 let reference = references.first();
530 let reference = if let Some(reference) = reference {
531 reference.clone()
532 } else {
533 continue;
534 };
535 let skip_components = !&config.repository.components.contains(&reference.component);
536
537 let skip = skip_components
538 || match &reference.file_type {
539 FileReferenceType::Ignored => true,
540 FileReferenceType::PDiff => true, // would require fetching the patches as well
541 FileReferenceType::Sources(_) => !source,
542 _ => {
543 if let Some(arch) = reference.file_type.architecture() {
544 !binary || !config.architectures.contains(arch)
545 } else {
546 false
547 }
548 }
549 };
550 if skip {
551 println!("Skipping {}", reference.path);
552 others.push(reference);
553 } else {
554 let list = per_component
555 .entry(reference.component)
556 .or_insert_with(Vec::new);
557 list.push(basename);
558 }
559 }
560 println!();
561
562 let mut indices_size = 0_usize;
563 let mut total_count = 0;
564
565 for (component, references) in &per_component {
566 println!("Component '{component}'");
567
568 let mut component_indices_size = 0;
569
570 for basename in references {
571 for reference in release.files.get(*basename).unwrap() {
572 println!("\t{:?}: {:?}", reference.path, reference.file_type);
573 component_indices_size += reference.size;
574 }
575 }
576 indices_size += component_indices_size;
577
578 let component_count = references.len();
579 total_count += component_count;
580
581 println!("Component references count: {component_count}");
582 println!("Component indices size: {component_indices_size}");
583 if references.is_empty() {
584 println!("\tNo references found..");
585 }
586 }
587 println!("Total indices count: {total_count}");
588 println!("Total indices size: {indices_size}");
589
590 if !others.is_empty() {
591 println!("Skipped {} references", others.len());
592 }
593 println!();
594
595 let mut packages_size = 0_usize;
596 let mut packages_indices = HashMap::new();
597 let mut failed_references = Vec::new();
598 for (component, references) in per_component {
599 println!("\nFetching indices for component '{component}'");
600 let mut component_deb_size = 0;
601 let mut fetch_progress = Progress::new();
602
603 for basename in references {
604 println!("\tFetching '{basename}'..");
605 let files = release.files.get(basename).unwrap();
606 let uncompressed_ref = files.iter().find(|reference| reference.path == *basename);
607
608 let mut package_index_data = None;
609
610 for reference in files {
611 // if both compressed and uncompressed are referenced, the uncompressed file may not exist on the server
612 if Some(reference) == uncompressed_ref && files.len() > 1 {
613 continue;
614 }
615
616 // this will ensure the uncompressed file will be written locally
617 let res = match fetch_index_file(
618 &config,
619 prefix,
620 reference,
621 uncompressed_ref,
622 release.aquire_by_hash,
623 dry_run,
624 ) {
625 Ok(res) => res,
626 Err(err) if !reference.file_type.is_package_index() => {
627 eprintln!(
628 "Failed to fetch '{:?}' type reference '{}', skipping - {err}",
629 reference.file_type, reference.path
630 );
631 failed_references.push(reference);
632 continue;
633 }
634 Err(err) => bail!(err),
635 };
636 fetch_progress.update(&res);
637
638 if package_index_data.is_none() && reference.file_type.is_package_index() {
639 package_index_data = Some(res.data());
640 }
641 }
642 if let Some(data) = package_index_data {
643 let packages: PackagesFile = data[..].try_into()?;
644 let size: usize = packages.files.iter().map(|p| p.size).sum();
645 println!("\t{} packages totalling {size}", packages.files.len());
646 component_deb_size += size;
647
648 packages_indices.entry(basename).or_insert(packages);
649 }
650 println!("Progress: {fetch_progress}");
651 }
652 println!("Total deb size for component: {component_deb_size}");
653 packages_size += component_deb_size;
654 total_progress += fetch_progress;
655 }
656 println!("Total deb size: {packages_size}");
657 if !failed_references.is_empty() {
658 eprintln!("Failed to download non-package-index references:");
659 for reference in failed_references {
660 eprintln!("\t{}", reference.path);
661 }
662 }
663
664 println!("\nFetching packages..");
665 let mut dry_run_progress = Progress::new();
666 for (basename, references) in packages_indices {
667 let total_files = references.files.len();
668 if total_files == 0 {
669 println!("\n{basename} - no files, skipping.");
670 continue;
671 } else {
672 println!("\n{basename} - {total_files} total file(s)");
673 }
674
675 let mut fetch_progress = Progress::new();
676 for package in references.files {
677 let url = get_repo_url(&config.repository, &package.file);
678
679 if dry_run {
680 if config.pool.contains(&package.checksums) {
681 fetch_progress.update(&FetchResult {
682 data: vec![],
683 fetched: 0,
684 });
685 } else {
686 println!("\t(dry-run) GET missing '{url}' ({}b)", package.size);
687 fetch_progress.update(&FetchResult {
688 data: vec![],
689 fetched: package.size,
690 });
691 }
692 } else {
693 let mut full_path = PathBuf::from(prefix);
694 full_path.push(&package.file);
695
696 match fetch_plain_file(
697 &config,
698 &url,
699 &full_path,
700 package.size,
701 &package.checksums,
702 false,
703 dry_run,
704 ) {
705 Ok(res) => fetch_progress.update(&res),
706 Err(err) if config.ignore_errors => {
707 let msg = format!(
708 "{}: failed to fetch package '{}' - {}",
709 basename, package.file, err,
710 );
711 eprintln!("{msg}");
712 }
713 res => {
714 res?;
715 }
716 }
717 }
718
719 if fetch_progress.file_count() % (max(total_files / 100, 1)) == 0 {
720 println!("\tProgress: {fetch_progress}");
721 }
722 }
723 println!("\tProgress: {fetch_progress}");
724 if dry_run {
725 dry_run_progress += fetch_progress;
726 } else {
727 total_progress += fetch_progress;
728 }
729 }
730
731 if dry_run {
732 println!("\nDry-run Stats (indices, downloaded but not persisted):\n{total_progress}");
733 println!("\nDry-run stats (packages, new == missing):\n{dry_run_progress}");
734 } else {
735 println!("\nStats: {total_progress}");
736 }
737
738 if !dry_run {
739 println!("Rotating temp. snapshot in-place: {prefix:?} -> \"{snapshot}\"");
740 let locked = config.pool.lock()?;
741 locked.rename(prefix, Path::new(&format!("{snapshot}")))?;
742 }
743
744 Ok(())
745 }
746
747 /// Remove a snapshot by removing the corresponding snapshot directory. To actually free up space, a garbage collection needs to be run afterwards.
748 pub fn remove_snapshot(config: &MirrorConfig, snapshot: &Snapshot) -> Result<(), Error> {
749 let pool: Pool = pool(config)?;
750 let path = pool.get_path(Path::new(&snapshot.to_string()))?;
751
752 pool.lock()?.remove_dir(&path)
753 }
754
755 /// Run a garbage collection on the underlying pool.
756 pub fn gc(config: &MirrorConfig) -> Result<(usize, u64), Error> {
757 let pool: Pool = pool(config)?;
758
759 pool.lock()?.gc()
760 }