]> git.proxmox.com Git - proxmox-backup.git/blob - src/client/backup_writer.rs
client writer: do not output chunklist for now on verbose true
[proxmox-backup.git] / src / client / backup_writer.rs
1 use std::collections::HashSet;
2 use std::os::unix::fs::OpenOptionsExt;
3 use std::sync::atomic::{AtomicUsize, Ordering};
4 use std::sync::{Arc, Mutex};
5
6 use anyhow::{bail, format_err, Error};
7 use chrono::{DateTime, Utc};
8 use futures::*;
9 use futures::stream::Stream;
10 use futures::future::AbortHandle;
11 use serde_json::{json, Value};
12 use tokio::io::AsyncReadExt;
13 use tokio::sync::{mpsc, oneshot};
14
15 use proxmox::tools::digest_to_hex;
16
17 use super::merge_known_chunks::{MergedChunkInfo, MergeKnownChunks};
18 use crate::backup::*;
19 use crate::tools::format::HumanByte;
20
21 use super::{HttpClient, H2Client};
22
23 pub struct BackupWriter {
24 h2: H2Client,
25 abort: AbortHandle,
26 verbose: bool,
27 crypt_config: Option<Arc<CryptConfig>>,
28 }
29
30 impl Drop for BackupWriter {
31
32 fn drop(&mut self) {
33 self.abort.abort();
34 }
35 }
36
37 pub struct BackupStats {
38 pub size: u64,
39 pub csum: [u8; 32],
40 }
41
42 impl BackupWriter {
43
44 fn new(h2: H2Client, abort: AbortHandle, crypt_config: Option<Arc<CryptConfig>>, verbose: bool) -> Arc<Self> {
45 Arc::new(Self { h2, abort, crypt_config, verbose })
46 }
47
48 pub async fn start(
49 client: HttpClient,
50 crypt_config: Option<Arc<CryptConfig>>,
51 datastore: &str,
52 backup_type: &str,
53 backup_id: &str,
54 backup_time: DateTime<Utc>,
55 debug: bool,
56 ) -> Result<Arc<BackupWriter>, Error> {
57
58 let param = json!({
59 "backup-type": backup_type,
60 "backup-id": backup_id,
61 "backup-time": backup_time.timestamp(),
62 "store": datastore,
63 "debug": debug
64 });
65
66 let req = HttpClient::request_builder(
67 client.server(), "GET", "/api2/json/backup", Some(param)).unwrap();
68
69 let (h2, abort) = client.start_h2_connection(req, String::from(PROXMOX_BACKUP_PROTOCOL_ID_V1!())).await?;
70
71 Ok(BackupWriter::new(h2, abort, crypt_config, debug))
72 }
73
74 pub async fn get(
75 &self,
76 path: &str,
77 param: Option<Value>,
78 ) -> Result<Value, Error> {
79 self.h2.get(path, param).await
80 }
81
82 pub async fn put(
83 &self,
84 path: &str,
85 param: Option<Value>,
86 ) -> Result<Value, Error> {
87 self.h2.put(path, param).await
88 }
89
90 pub async fn post(
91 &self,
92 path: &str,
93 param: Option<Value>,
94 ) -> Result<Value, Error> {
95 self.h2.post(path, param).await
96 }
97
98 pub async fn upload_post(
99 &self,
100 path: &str,
101 param: Option<Value>,
102 content_type: &str,
103 data: Vec<u8>,
104 ) -> Result<Value, Error> {
105 self.h2.upload("POST", path, param, content_type, data).await
106 }
107
108 pub async fn send_upload_request(
109 &self,
110 method: &str,
111 path: &str,
112 param: Option<Value>,
113 content_type: &str,
114 data: Vec<u8>,
115 ) -> Result<h2::client::ResponseFuture, Error> {
116
117 let request = H2Client::request_builder("localhost", method, path, param, Some(content_type)).unwrap();
118 let response_future = self.h2.send_request(request, Some(bytes::Bytes::from(data.clone()))).await?;
119 Ok(response_future)
120 }
121
122 pub async fn upload_put(
123 &self,
124 path: &str,
125 param: Option<Value>,
126 content_type: &str,
127 data: Vec<u8>,
128 ) -> Result<Value, Error> {
129 self.h2.upload("PUT", path, param, content_type, data).await
130 }
131
132 pub async fn finish(self: Arc<Self>) -> Result<(), Error> {
133 let h2 = self.h2.clone();
134
135 h2.post("finish", None)
136 .map_ok(move |_| {
137 self.abort.abort();
138 })
139 .await
140 }
141
142 pub fn cancel(&self) {
143 self.abort.abort();
144 }
145
146 pub async fn upload_blob<R: std::io::Read>(
147 &self,
148 mut reader: R,
149 file_name: &str,
150 ) -> Result<BackupStats, Error> {
151 let mut raw_data = Vec::new();
152 // fixme: avoid loading into memory
153 reader.read_to_end(&mut raw_data)?;
154
155 let csum = openssl::sha::sha256(&raw_data);
156 let param = json!({"encoded-size": raw_data.len(), "file-name": file_name });
157 let size = raw_data.len() as u64;
158 let _value = self.h2.upload("POST", "blob", Some(param), "application/octet-stream", raw_data).await?;
159 Ok(BackupStats { size, csum })
160 }
161
162 pub async fn upload_blob_from_data(
163 &self,
164 data: Vec<u8>,
165 file_name: &str,
166 compress: bool,
167 encrypt: bool,
168 ) -> Result<BackupStats, Error> {
169 let blob = match (encrypt, &self.crypt_config) {
170 (false, _) => DataBlob::encode(&data, None, compress)?,
171 (true, None) => bail!("requested encryption without a crypt config"),
172 (true, Some(crypt_config)) => DataBlob::encode(&data, Some(crypt_config), compress)?,
173 };
174
175 let raw_data = blob.into_inner();
176 let size = raw_data.len() as u64;
177
178 let csum = openssl::sha::sha256(&raw_data);
179 let param = json!({"encoded-size": size, "file-name": file_name });
180 let _value = self.h2.upload("POST", "blob", Some(param), "application/octet-stream", raw_data).await?;
181 Ok(BackupStats { size, csum })
182 }
183
184 pub async fn upload_blob_from_file<P: AsRef<std::path::Path>>(
185 &self,
186 src_path: P,
187 file_name: &str,
188 compress: bool,
189 encrypt: bool,
190 ) -> Result<BackupStats, Error> {
191
192 let src_path = src_path.as_ref();
193
194 let mut file = tokio::fs::File::open(src_path)
195 .await
196 .map_err(|err| format_err!("unable to open file {:?} - {}", src_path, err))?;
197
198 let mut contents = Vec::new();
199
200 file.read_to_end(&mut contents)
201 .await
202 .map_err(|err| format_err!("unable to read file {:?} - {}", src_path, err))?;
203
204 self.upload_blob_from_data(contents, file_name, compress, encrypt).await
205 }
206
207 pub async fn upload_stream(
208 &self,
209 previous_manifest: Option<Arc<BackupManifest>>,
210 archive_name: &str,
211 stream: impl Stream<Item = Result<bytes::BytesMut, Error>>,
212 prefix: &str,
213 fixed_size: Option<u64>,
214 compress: bool,
215 encrypt: bool,
216 ) -> Result<BackupStats, Error> {
217 let known_chunks = Arc::new(Mutex::new(HashSet::new()));
218
219 let mut param = json!({ "archive-name": archive_name });
220 if let Some(size) = fixed_size {
221 param["size"] = size.into();
222 }
223
224 if encrypt && self.crypt_config.is_none() {
225 bail!("requested encryption without a crypt config");
226 }
227
228 let index_path = format!("{}_index", prefix);
229 let close_path = format!("{}_close", prefix);
230
231 if let Some(manifest) = previous_manifest {
232 // try, but ignore errors
233 match archive_type(archive_name) {
234 Ok(ArchiveType::FixedIndex) => {
235 let _ = self.download_previous_fixed_index(archive_name, &manifest, known_chunks.clone()).await;
236 }
237 Ok(ArchiveType::DynamicIndex) => {
238 let _ = self.download_previous_dynamic_index(archive_name, &manifest, known_chunks.clone()).await;
239 }
240 _ => { /* do nothing */ }
241 }
242 }
243
244 let wid = self.h2.post(&index_path, Some(param)).await?.as_u64().unwrap();
245
246 let (chunk_count, chunk_reused, size, size_reused, duration, csum) =
247 Self::upload_chunk_info_stream(
248 self.h2.clone(),
249 wid,
250 stream,
251 &prefix,
252 known_chunks.clone(),
253 if encrypt { self.crypt_config.clone() } else { None },
254 compress,
255 self.verbose,
256 )
257 .await?;
258
259 let uploaded = size - size_reused;
260 let vsize_h: HumanByte = size.into();
261 let archive = if self.verbose {
262 archive_name.to_string()
263 } else {
264 crate::tools::format::strip_server_file_expenstion(archive_name.clone())
265 };
266 if archive_name != CATALOG_NAME {
267 let speed: HumanByte = (uploaded / (duration.as_secs() as usize)).into();
268 let uploaded: HumanByte = uploaded.into();
269 println!("{}: had to upload {} from {} in {}s, avgerage speed {}/s).", archive, uploaded, vsize_h, duration.as_secs(), speed);
270 } else {
271 println!("Uploaded backup catalog ({})", vsize_h);
272 }
273
274 if size_reused > 0 && size > 1024*1024 {
275 let reused_percent = size_reused as f64 * 100. / size as f64;
276 let reused: HumanByte = size_reused.into();
277 println!("{}: backup was done incrementally, reused {} ({:.1}%)", archive, reused, reused_percent);
278 }
279 if self.verbose && chunk_count > 0 {
280 println!("{}: Reused {} from {} chunks.", archive, chunk_reused, chunk_count);
281 println!("{}: Average chunk size was {}.", archive, HumanByte::from(size/chunk_count));
282 println!("{}: Average time per request: {} microseconds.", archive, (duration.as_micros())/(chunk_count as u128));
283 }
284
285 let param = json!({
286 "wid": wid ,
287 "chunk-count": chunk_count,
288 "size": size,
289 "csum": proxmox::tools::digest_to_hex(&csum),
290 });
291 let _value = self.h2.post(&close_path, Some(param)).await?;
292 Ok(BackupStats {
293 size: size as u64,
294 csum,
295 })
296 }
297
298 fn response_queue(verbose: bool) -> (
299 mpsc::Sender<h2::client::ResponseFuture>,
300 oneshot::Receiver<Result<(), Error>>
301 ) {
302 let (verify_queue_tx, verify_queue_rx) = mpsc::channel(100);
303 let (verify_result_tx, verify_result_rx) = oneshot::channel();
304
305 // FIXME: check if this works as expected as replacement for the combinator below?
306 // tokio::spawn(async move {
307 // let result: Result<(), Error> = (async move {
308 // while let Some(response) = verify_queue_rx.recv().await {
309 // match H2Client::h2api_response(response.await?).await {
310 // Ok(result) => println!("RESPONSE: {:?}", result),
311 // Err(err) => bail!("pipelined request failed: {}", err),
312 // }
313 // }
314 // Ok(())
315 // }).await;
316 // let _ignore_closed_channel = verify_result_tx.send(result);
317 // });
318 // old code for reference?
319 tokio::spawn(
320 verify_queue_rx
321 .map(Ok::<_, Error>)
322 .try_for_each(move |response: h2::client::ResponseFuture| {
323 response
324 .map_err(Error::from)
325 .and_then(H2Client::h2api_response)
326 .map_ok(move |result| if verbose { println!("RESPONSE: {:?}", result) })
327 .map_err(|err| format_err!("pipelined request failed: {}", err))
328 })
329 .map(|result| {
330 let _ignore_closed_channel = verify_result_tx.send(result);
331 })
332 );
333
334 (verify_queue_tx, verify_result_rx)
335 }
336
337 fn append_chunk_queue(h2: H2Client, wid: u64, path: String, verbose: bool) -> (
338 mpsc::Sender<(MergedChunkInfo, Option<h2::client::ResponseFuture>)>,
339 oneshot::Receiver<Result<(), Error>>,
340 ) {
341 let (verify_queue_tx, verify_queue_rx) = mpsc::channel(64);
342 let (verify_result_tx, verify_result_rx) = oneshot::channel();
343
344 let h2_2 = h2.clone();
345
346 // FIXME: async-block-ify this code!
347 tokio::spawn(
348 verify_queue_rx
349 .map(Ok::<_, Error>)
350 .and_then(move |(merged_chunk_info, response): (MergedChunkInfo, Option<h2::client::ResponseFuture>)| {
351 match (response, merged_chunk_info) {
352 (Some(response), MergedChunkInfo::Known(list)) => {
353 future::Either::Left(
354 response
355 .map_err(Error::from)
356 .and_then(H2Client::h2api_response)
357 .and_then(move |_result| {
358 future::ok(MergedChunkInfo::Known(list))
359 })
360 )
361 }
362 (None, MergedChunkInfo::Known(list)) => {
363 future::Either::Right(future::ok(MergedChunkInfo::Known(list)))
364 }
365 _ => unreachable!(),
366 }
367 })
368 .merge_known_chunks()
369 .and_then(move |merged_chunk_info| {
370 match merged_chunk_info {
371 MergedChunkInfo::Known(chunk_list) => {
372 let mut digest_list = vec![];
373 let mut offset_list = vec![];
374 for (offset, digest) in chunk_list {
375 digest_list.push(digest_to_hex(&digest));
376 offset_list.push(offset);
377 }
378 if verbose { println!("append chunks list len ({})", digest_list.len()); }
379 let param = json!({ "wid": wid, "digest-list": digest_list, "offset-list": offset_list });
380 let request = H2Client::request_builder("localhost", "PUT", &path, None, Some("application/json")).unwrap();
381 let param_data = bytes::Bytes::from(param.to_string().into_bytes());
382 let upload_data = Some(param_data);
383 h2_2.send_request(request, upload_data)
384 .and_then(move |response| {
385 response
386 .map_err(Error::from)
387 .and_then(H2Client::h2api_response)
388 .map_ok(|_| ())
389 })
390 .map_err(|err| format_err!("pipelined request failed: {}", err))
391 }
392 _ => unreachable!(),
393 }
394 })
395 .try_for_each(|_| future::ok(()))
396 .map(|result| {
397 let _ignore_closed_channel = verify_result_tx.send(result);
398 })
399 );
400
401 (verify_queue_tx, verify_result_rx)
402 }
403
404 pub async fn download_previous_fixed_index(
405 &self,
406 archive_name: &str,
407 manifest: &BackupManifest,
408 known_chunks: Arc<Mutex<HashSet<[u8;32]>>>,
409 ) -> Result<FixedIndexReader, Error> {
410
411 let mut tmpfile = std::fs::OpenOptions::new()
412 .write(true)
413 .read(true)
414 .custom_flags(libc::O_TMPFILE)
415 .open("/tmp")?;
416
417 let param = json!({ "archive-name": archive_name });
418 self.h2.download("previous", Some(param), &mut tmpfile).await?;
419
420 let index = FixedIndexReader::new(tmpfile)
421 .map_err(|err| format_err!("unable to read fixed index '{}' - {}", archive_name, err))?;
422 // Note: do not use values stored in index (not trusted) - instead, computed them again
423 let (csum, size) = index.compute_csum();
424 manifest.verify_file(archive_name, &csum, size)?;
425
426 // add index chunks to known chunks
427 let mut known_chunks = known_chunks.lock().unwrap();
428 for i in 0..index.index_count() {
429 known_chunks.insert(*index.index_digest(i).unwrap());
430 }
431
432 if self.verbose {
433 println!("{}: known chunks list length is {}", archive_name, index.index_count());
434 }
435
436 Ok(index)
437 }
438
439 pub async fn download_previous_dynamic_index(
440 &self,
441 archive_name: &str,
442 manifest: &BackupManifest,
443 known_chunks: Arc<Mutex<HashSet<[u8;32]>>>,
444 ) -> Result<DynamicIndexReader, Error> {
445
446 let mut tmpfile = std::fs::OpenOptions::new()
447 .write(true)
448 .read(true)
449 .custom_flags(libc::O_TMPFILE)
450 .open("/tmp")?;
451
452 let param = json!({ "archive-name": archive_name });
453 self.h2.download("previous", Some(param), &mut tmpfile).await?;
454
455 let index = DynamicIndexReader::new(tmpfile)
456 .map_err(|err| format_err!("unable to read dynmamic index '{}' - {}", archive_name, err))?;
457 // Note: do not use values stored in index (not trusted) - instead, computed them again
458 let (csum, size) = index.compute_csum();
459 manifest.verify_file(archive_name, &csum, size)?;
460
461 // add index chunks to known chunks
462 let mut known_chunks = known_chunks.lock().unwrap();
463 for i in 0..index.index_count() {
464 known_chunks.insert(*index.index_digest(i).unwrap());
465 }
466
467 if self.verbose {
468 println!("{}: known chunks list length is {}", archive_name, index.index_count());
469 }
470
471 Ok(index)
472 }
473
474 /// Download backup manifest (index.json) of last backup
475 pub async fn download_previous_manifest(&self) -> Result<BackupManifest, Error> {
476
477 let mut raw_data = Vec::with_capacity(64 * 1024);
478
479 let param = json!({ "archive-name": MANIFEST_BLOB_NAME });
480 self.h2.download("previous", Some(param), &mut raw_data).await?;
481
482 let blob = DataBlob::from_raw(raw_data)?;
483 blob.verify_crc()?;
484 let data = blob.decode(self.crypt_config.as_ref().map(Arc::as_ref))?;
485
486 let manifest = BackupManifest::from_data(&data[..], self.crypt_config.as_ref().map(Arc::as_ref))?;
487
488 Ok(manifest)
489 }
490
491 fn upload_chunk_info_stream(
492 h2: H2Client,
493 wid: u64,
494 stream: impl Stream<Item = Result<bytes::BytesMut, Error>>,
495 prefix: &str,
496 known_chunks: Arc<Mutex<HashSet<[u8;32]>>>,
497 crypt_config: Option<Arc<CryptConfig>>,
498 compress: bool,
499 verbose: bool,
500 ) -> impl Future<Output = Result<(usize, usize, usize, usize, std::time::Duration, [u8; 32]), Error>> {
501
502 let total_chunks = Arc::new(AtomicUsize::new(0));
503 let total_chunks2 = total_chunks.clone();
504 let known_chunk_count = Arc::new(AtomicUsize::new(0));
505 let known_chunk_count2 = known_chunk_count.clone();
506
507 let stream_len = Arc::new(AtomicUsize::new(0));
508 let stream_len2 = stream_len.clone();
509 let reused_len = Arc::new(AtomicUsize::new(0));
510 let reused_len2 = reused_len.clone();
511
512 let append_chunk_path = format!("{}_index", prefix);
513 let upload_chunk_path = format!("{}_chunk", prefix);
514 let is_fixed_chunk_size = prefix == "fixed";
515
516 let (upload_queue, upload_result) =
517 Self::append_chunk_queue(h2.clone(), wid, append_chunk_path.to_owned(), verbose);
518
519 let start_time = std::time::Instant::now();
520
521 let index_csum = Arc::new(Mutex::new(Some(openssl::sha::Sha256::new())));
522 let index_csum_2 = index_csum.clone();
523
524 stream
525 .and_then(move |data| {
526
527 let chunk_len = data.len();
528
529 total_chunks.fetch_add(1, Ordering::SeqCst);
530 let offset = stream_len.fetch_add(chunk_len, Ordering::SeqCst) as u64;
531
532 let mut chunk_builder = DataChunkBuilder::new(data.as_ref())
533 .compress(compress);
534
535 if let Some(ref crypt_config) = crypt_config {
536 chunk_builder = chunk_builder.crypt_config(crypt_config);
537 }
538
539 let mut known_chunks = known_chunks.lock().unwrap();
540 let digest = chunk_builder.digest();
541
542 let mut guard = index_csum.lock().unwrap();
543 let csum = guard.as_mut().unwrap();
544
545 let chunk_end = offset + chunk_len as u64;
546
547 if !is_fixed_chunk_size { csum.update(&chunk_end.to_le_bytes()); }
548 csum.update(digest);
549
550 let chunk_is_known = known_chunks.contains(digest);
551 if chunk_is_known {
552 known_chunk_count.fetch_add(1, Ordering::SeqCst);
553 reused_len.fetch_add(chunk_len, Ordering::SeqCst);
554 future::ok(MergedChunkInfo::Known(vec![(offset, *digest)]))
555 } else {
556 known_chunks.insert(*digest);
557 future::ready(chunk_builder
558 .build()
559 .map(move |(chunk, digest)| MergedChunkInfo::New(ChunkInfo {
560 chunk,
561 digest,
562 chunk_len: chunk_len as u64,
563 offset,
564 }))
565 )
566 }
567 })
568 .merge_known_chunks()
569 .try_for_each(move |merged_chunk_info| {
570
571 if let MergedChunkInfo::New(chunk_info) = merged_chunk_info {
572 let offset = chunk_info.offset;
573 let digest = chunk_info.digest;
574 let digest_str = digest_to_hex(&digest);
575
576 if false && verbose { // TO verbose, needs finer verbosity setting granularity
577 println!("upload new chunk {} ({} bytes, offset {})", digest_str,
578 chunk_info.chunk_len, offset);
579 }
580
581 let chunk_data = chunk_info.chunk.into_inner();
582 let param = json!({
583 "wid": wid,
584 "digest": digest_str,
585 "size": chunk_info.chunk_len,
586 "encoded-size": chunk_data.len(),
587 });
588
589 let ct = "application/octet-stream";
590 let request = H2Client::request_builder("localhost", "POST", &upload_chunk_path, Some(param), Some(ct)).unwrap();
591 let upload_data = Some(bytes::Bytes::from(chunk_data));
592
593 let new_info = MergedChunkInfo::Known(vec![(offset, digest)]);
594
595 let mut upload_queue = upload_queue.clone();
596 future::Either::Left(h2
597 .send_request(request, upload_data)
598 .and_then(move |response| async move {
599 upload_queue
600 .send((new_info, Some(response)))
601 .await
602 .map_err(|err| format_err!("failed to send to upload queue: {}", err))
603 })
604 )
605 } else {
606 let mut upload_queue = upload_queue.clone();
607 future::Either::Right(async move {
608 upload_queue
609 .send((merged_chunk_info, None))
610 .await
611 .map_err(|err| format_err!("failed to send to upload queue: {}", err))
612 })
613 }
614 })
615 .then(move |result| async move {
616 upload_result.await?.and(result)
617 }.boxed())
618 .and_then(move |_| {
619 let duration = start_time.elapsed();
620 let total_chunks = total_chunks2.load(Ordering::SeqCst);
621 let known_chunk_count = known_chunk_count2.load(Ordering::SeqCst);
622 let stream_len = stream_len2.load(Ordering::SeqCst);
623 let reused_len = reused_len2.load(Ordering::SeqCst);
624
625 let mut guard = index_csum_2.lock().unwrap();
626 let csum = guard.take().unwrap().finish();
627
628 futures::future::ok((total_chunks, known_chunk_count, stream_len, reused_len, duration, csum))
629 })
630 }
631
632 /// Upload speed test - prints result ot stderr
633 pub async fn upload_speedtest(&self, verbose: bool) -> Result<f64, Error> {
634
635 let mut data = vec![];
636 // generate pseudo random byte sequence
637 for i in 0..1024*1024 {
638 for j in 0..4 {
639 let byte = ((i >> (j<<3))&0xff) as u8;
640 data.push(byte);
641 }
642 }
643
644 let item_len = data.len();
645
646 let mut repeat = 0;
647
648 let (upload_queue, upload_result) = Self::response_queue(verbose);
649
650 let start_time = std::time::Instant::now();
651
652 loop {
653 repeat += 1;
654 if start_time.elapsed().as_secs() >= 5 {
655 break;
656 }
657
658 let mut upload_queue = upload_queue.clone();
659
660 if verbose { eprintln!("send test data ({} bytes)", data.len()); }
661 let request = H2Client::request_builder("localhost", "POST", "speedtest", None, None).unwrap();
662 let request_future = self.h2.send_request(request, Some(bytes::Bytes::from(data.clone()))).await?;
663
664 upload_queue.send(request_future).await?;
665 }
666
667 drop(upload_queue); // close queue
668
669 let _ = upload_result.await?;
670
671 eprintln!("Uploaded {} chunks in {} seconds.", repeat, start_time.elapsed().as_secs());
672 let speed = ((item_len*(repeat as usize)) as f64)/start_time.elapsed().as_secs_f64();
673 eprintln!("Time per request: {} microseconds.", (start_time.elapsed().as_micros())/(repeat as u128));
674
675 Ok(speed)
676 }
677 }