]> git.proxmox.com Git - proxmox-backup.git/blame - pbs-client/src/backup_writer.rs
clippy 1.65 fixes
[proxmox-backup.git] / pbs-client / src / backup_writer.rs
CommitLineData
cf9271e2 1use std::collections::HashSet;
be3a0295 2use std::future::Future;
b957aa81 3use std::os::unix::fs::OpenOptionsExt;
924373d2 4use std::sync::atomic::{AtomicU64, AtomicUsize, Ordering};
cf9271e2
DM
5use std::sync::{Arc, Mutex};
6
f28d9088 7use anyhow::{bail, format_err, Error};
be3a0295
WB
8use futures::future::{self, AbortHandle, Either, FutureExt, TryFutureExt};
9use futures::stream::{Stream, StreamExt, TryStreamExt};
cf9271e2
DM
10use serde_json::{json, Value};
11use tokio::io::AsyncReadExt;
12use tokio::sync::{mpsc, oneshot};
7c667013 13use tokio_stream::wrappers::ReceiverStream;
cf9271e2 14
133d718f 15use pbs_api_types::{BackupDir, BackupNamespace, HumanByte};
4805edc4
WB
16use pbs_datastore::data_blob::{ChunkInfo, DataBlob, DataChunkBuilder};
17use pbs_datastore::dynamic_index::DynamicIndexReader;
18use pbs_datastore::fixed_index::FixedIndexReader;
19use pbs_datastore::index::IndexFile;
20use pbs_datastore::manifest::{ArchiveType, BackupManifest, MANIFEST_BLOB_NAME};
bdfa6370
TL
21use pbs_datastore::{CATALOG_NAME, PROXMOX_BACKUP_PROTOCOL_ID_V1};
22use pbs_tools::crypt_config::CryptConfig;
770a36e5 23
ef6d4967 24use super::merge_known_chunks::{MergeKnownChunks, MergedChunkInfo};
cf9271e2 25
ef6d4967 26use super::{H2Client, HttpClient};
cf9271e2
DM
27
28pub struct BackupWriter {
29 h2: H2Client,
dc089345 30 abort: AbortHandle,
b957aa81 31 crypt_config: Option<Arc<CryptConfig>>,
cf9271e2
DM
32}
33
34impl Drop for BackupWriter {
cf9271e2 35 fn drop(&mut self) {
dc089345 36 self.abort.abort();
cf9271e2
DM
37 }
38}
39
40pub struct BackupStats {
41 pub size: u64,
42 pub csum: [u8; 32],
43}
44
e43b9175
FG
45/// Options for uploading blobs/streams to the server
46#[derive(Default, Clone)]
47pub struct UploadOptions {
48 pub previous_manifest: Option<Arc<BackupManifest>>,
49 pub compress: bool,
50 pub encrypt: bool,
51 pub fixed_size: Option<u64>,
52}
53
3b60b509
DC
54struct UploadStats {
55 chunk_count: usize,
56 chunk_reused: usize,
57 size: usize,
58 size_reused: usize,
924373d2 59 size_compressed: usize,
3b60b509
DC
60 duration: std::time::Duration,
61 csum: [u8; 32],
62}
63
8db14689
WB
64type UploadQueueSender = mpsc::Sender<(MergedChunkInfo, Option<h2::client::ResponseFuture>)>;
65type UploadResultReceiver = oneshot::Receiver<Result<(), Error>>;
66
cf9271e2 67impl BackupWriter {
e10fccf5 68 fn new(h2: H2Client, abort: AbortHandle, crypt_config: Option<Arc<CryptConfig>>) -> Arc<Self> {
ef6d4967
TL
69 Arc::new(Self {
70 h2,
71 abort,
72 crypt_config,
ef6d4967 73 })
cf9271e2
DM
74 }
75
367c0ff7
FG
76 // FIXME: extract into (flattened) parameter struct?
77 #[allow(clippy::too_many_arguments)]
cf9271e2
DM
78 pub async fn start(
79 client: HttpClient,
b957aa81 80 crypt_config: Option<Arc<CryptConfig>>,
cf9271e2 81 datastore: &str,
133d718f 82 ns: &BackupNamespace,
8c74349b 83 backup: &BackupDir,
cf9271e2 84 debug: bool,
ef6d4967 85 benchmark: bool,
cf9271e2 86 ) -> Result<Arc<BackupWriter>, Error> {
c18d481f 87 let mut param = json!({
8c74349b
WB
88 "backup-type": backup.ty(),
89 "backup-id": backup.id(),
90 "backup-time": backup.time,
cf9271e2 91 "store": datastore,
61d7b501
HL
92 "debug": debug,
93 "benchmark": benchmark
cf9271e2
DM
94 });
95
c18d481f 96 if !ns.is_root() {
bc21ade2 97 param["ns"] = serde_json::to_value(ns)?;
c18d481f
WB
98 }
99
cf9271e2 100 let req = HttpClient::request_builder(
ef6d4967
TL
101 client.server(),
102 client.port(),
103 "GET",
104 "/api2/json/backup",
105 Some(param),
106 )
107 .unwrap();
108
109 let (h2, abort) = client
110 .start_h2_connection(req, String::from(PROXMOX_BACKUP_PROTOCOL_ID_V1!()))
111 .await?;
cf9271e2 112
e10fccf5 113 Ok(BackupWriter::new(h2, abort, crypt_config))
cf9271e2
DM
114 }
115
ef6d4967 116 pub async fn get(&self, path: &str, param: Option<Value>) -> Result<Value, Error> {
cf9271e2
DM
117 self.h2.get(path, param).await
118 }
119
ef6d4967 120 pub async fn put(&self, path: &str, param: Option<Value>) -> Result<Value, Error> {
cf9271e2
DM
121 self.h2.put(path, param).await
122 }
123
ef6d4967 124 pub async fn post(&self, path: &str, param: Option<Value>) -> Result<Value, Error> {
cf9271e2
DM
125 self.h2.post(path, param).await
126 }
127
128 pub async fn upload_post(
129 &self,
130 path: &str,
131 param: Option<Value>,
132 content_type: &str,
133 data: Vec<u8>,
134 ) -> Result<Value, Error> {
ef6d4967
TL
135 self.h2
136 .upload("POST", path, param, content_type, data)
137 .await
cf9271e2
DM
138 }
139
140 pub async fn send_upload_request(
141 &self,
142 method: &str,
143 path: &str,
144 param: Option<Value>,
145 content_type: &str,
146 data: Vec<u8>,
147 ) -> Result<h2::client::ResponseFuture, Error> {
ef6d4967
TL
148 let request =
149 H2Client::request_builder("localhost", method, path, param, Some(content_type))
150 .unwrap();
151 let response_future = self
152 .h2
153 .send_request(request, Some(bytes::Bytes::from(data.clone())))
154 .await?;
cf9271e2
DM
155 Ok(response_future)
156 }
157
158 pub async fn upload_put(
159 &self,
160 path: &str,
161 param: Option<Value>,
162 content_type: &str,
163 data: Vec<u8>,
164 ) -> Result<Value, Error> {
165 self.h2.upload("PUT", path, param, content_type, data).await
166 }
167
168 pub async fn finish(self: Arc<Self>) -> Result<(), Error> {
169 let h2 = self.h2.clone();
170
171 h2.post("finish", None)
172 .map_ok(move |_| {
dc089345 173 self.abort.abort();
cf9271e2
DM
174 })
175 .await
176 }
177
e016f9ff 178 pub fn cancel(&self) {
dc089345 179 self.abort.abort();
cf9271e2
DM
180 }
181
182 pub async fn upload_blob<R: std::io::Read>(
183 &self,
184 mut reader: R,
185 file_name: &str,
ef6d4967 186 ) -> Result<BackupStats, Error> {
cf9271e2
DM
187 let mut raw_data = Vec::new();
188 // fixme: avoid loading into memory
189 reader.read_to_end(&mut raw_data)?;
190
191 let csum = openssl::sha::sha256(&raw_data);
192 let param = json!({"encoded-size": raw_data.len(), "file-name": file_name });
193 let size = raw_data.len() as u64;
ef6d4967
TL
194 let _value = self
195 .h2
196 .upload(
197 "POST",
198 "blob",
199 Some(param),
200 "application/octet-stream",
201 raw_data,
202 )
203 .await?;
cf9271e2
DM
204 Ok(BackupStats { size, csum })
205 }
206
207 pub async fn upload_blob_from_data(
208 &self,
209 data: Vec<u8>,
210 file_name: &str,
e43b9175 211 options: UploadOptions,
f28d9088 212 ) -> Result<BackupStats, Error> {
e43b9175 213 let blob = match (options.encrypt, &self.crypt_config) {
ef6d4967
TL
214 (false, _) => DataBlob::encode(&data, None, options.compress)?,
215 (true, None) => bail!("requested encryption without a crypt config"),
216 (true, Some(crypt_config)) => {
217 DataBlob::encode(&data, Some(crypt_config), options.compress)?
218 }
cf9271e2
DM
219 };
220
221 let raw_data = blob.into_inner();
222 let size = raw_data.len() as u64;
223
224 let csum = openssl::sha::sha256(&raw_data);
225 let param = json!({"encoded-size": size, "file-name": file_name });
ef6d4967
TL
226 let _value = self
227 .h2
228 .upload(
229 "POST",
230 "blob",
231 Some(param),
232 "application/octet-stream",
233 raw_data,
234 )
235 .await?;
cf9271e2
DM
236 Ok(BackupStats { size, csum })
237 }
238
239 pub async fn upload_blob_from_file<P: AsRef<std::path::Path>>(
240 &self,
241 src_path: P,
242 file_name: &str,
e43b9175 243 options: UploadOptions,
3638341a 244 ) -> Result<BackupStats, Error> {
cf9271e2
DM
245 let src_path = src_path.as_ref();
246
247 let mut file = tokio::fs::File::open(src_path)
248 .await
249 .map_err(|err| format_err!("unable to open file {:?} - {}", src_path, err))?;
250
251 let mut contents = Vec::new();
252
253 file.read_to_end(&mut contents)
254 .await
255 .map_err(|err| format_err!("unable to read file {:?} - {}", src_path, err))?;
256
ef6d4967
TL
257 self.upload_blob_from_data(contents, file_name, options)
258 .await
cf9271e2
DM
259 }
260
261 pub async fn upload_stream(
262 &self,
263 archive_name: &str,
264 stream: impl Stream<Item = Result<bytes::BytesMut, Error>>,
e43b9175 265 options: UploadOptions,
cf9271e2
DM
266 ) -> Result<BackupStats, Error> {
267 let known_chunks = Arc::new(Mutex::new(HashSet::new()));
268
269 let mut param = json!({ "archive-name": archive_name });
e43b9175 270 let prefix = if let Some(size) = options.fixed_size {
cf9271e2 271 param["size"] = size.into();
e43b9175
FG
272 "fixed"
273 } else {
274 "dynamic"
275 };
cf9271e2 276
e43b9175 277 if options.encrypt && self.crypt_config.is_none() {
3638341a
DM
278 bail!("requested encryption without a crypt config");
279 }
280
cf9271e2
DM
281 let index_path = format!("{}_index", prefix);
282 let close_path = format!("{}_close", prefix);
283
e43b9175 284 if let Some(manifest) = options.previous_manifest {
b957aa81 285 // try, but ignore errors
4805edc4 286 match ArchiveType::from_path(archive_name) {
b957aa81 287 Ok(ArchiveType::FixedIndex) => {
7c22932c 288 if let Err(err) = self
ef6d4967
TL
289 .download_previous_fixed_index(
290 archive_name,
291 &manifest,
292 known_chunks.clone(),
293 )
7c22932c
DC
294 .await
295 {
296 eprintln!("Error downloading .fidx from previous manifest: {}", err);
297 }
b957aa81
DM
298 }
299 Ok(ArchiveType::DynamicIndex) => {
7c22932c 300 if let Err(err) = self
ef6d4967
TL
301 .download_previous_dynamic_index(
302 archive_name,
303 &manifest,
304 known_chunks.clone(),
305 )
7c22932c
DC
306 .await
307 {
308 eprintln!("Error downloading .didx from previous manifest: {}", err);
309 }
b957aa81
DM
310 }
311 _ => { /* do nothing */ }
312 }
313 }
cf9271e2 314
ef6d4967
TL
315 let wid = self
316 .h2
317 .post(&index_path, Some(param))
318 .await?
319 .as_u64()
320 .unwrap();
cf9271e2 321
3b60b509
DC
322 let upload_stats = Self::upload_chunk_info_stream(
323 self.h2.clone(),
324 wid,
325 stream,
9a37bd6c 326 prefix,
3b60b509
DC
327 known_chunks.clone(),
328 if options.encrypt {
329 self.crypt_config.clone()
330 } else {
331 None
332 },
333 options.compress,
3b60b509
DC
334 )
335 .await?;
cf9271e2 336
924373d2
DC
337 let size_dirty = upload_stats.size - upload_stats.size_reused;
338 let size: HumanByte = upload_stats.size.into();
e10fccf5 339 let archive = if log::log_enabled!(log::Level::Debug) {
d7eedbd2 340 archive_name
6e1deb15 341 } else {
770a36e5 342 pbs_tools::format::strip_server_file_extension(archive_name)
6e1deb15 343 };
e10fccf5 344
6e1deb15 345 if archive_name != CATALOG_NAME {
ef6d4967 346 let speed: HumanByte =
924373d2
DC
347 ((size_dirty * 1_000_000) / (upload_stats.duration.as_micros() as usize)).into();
348 let size_dirty: HumanByte = size_dirty.into();
349 let size_compressed: HumanByte = upload_stats.size_compressed.into();
e10fccf5 350 log::info!(
924373d2 351 "{}: had to backup {} of {} (compressed {}) in {:.2}s",
ef6d4967 352 archive,
924373d2
DC
353 size_dirty,
354 size,
355 size_compressed,
356 upload_stats.duration.as_secs_f64()
ef6d4967 357 );
e10fccf5 358 log::info!("{}: average backup speed: {}/s", archive, speed);
6e1deb15 359 } else {
e10fccf5 360 log::info!("Uploaded backup catalog ({})", size);
6e1deb15
TL
361 }
362
3b60b509
DC
363 if upload_stats.size_reused > 0 && upload_stats.size > 1024 * 1024 {
364 let reused_percent = upload_stats.size_reused as f64 * 100. / upload_stats.size as f64;
365 let reused: HumanByte = upload_stats.size_reused.into();
e10fccf5 366 log::info!(
ef6d4967 367 "{}: backup was done incrementally, reused {} ({:.1}%)",
e10fccf5
HL
368 archive,
369 reused,
370 reused_percent
ef6d4967 371 );
6e1deb15 372 }
e10fccf5
HL
373 if log::log_enabled!(log::Level::Debug) && upload_stats.chunk_count > 0 {
374 log::debug!(
ef6d4967 375 "{}: Reused {} from {} chunks.",
e10fccf5
HL
376 archive,
377 upload_stats.chunk_reused,
378 upload_stats.chunk_count
ef6d4967 379 );
e10fccf5 380 log::debug!(
ef6d4967
TL
381 "{}: Average chunk size was {}.",
382 archive,
3b60b509 383 HumanByte::from(upload_stats.size / upload_stats.chunk_count)
ef6d4967 384 );
e10fccf5 385 log::debug!(
ef6d4967
TL
386 "{}: Average time per request: {} microseconds.",
387 archive,
3b60b509 388 (upload_stats.duration.as_micros()) / (upload_stats.chunk_count as u128)
ef6d4967 389 );
6da73c82
DM
390 }
391
cf9271e2
DM
392 let param = json!({
393 "wid": wid ,
3b60b509
DC
394 "chunk-count": upload_stats.chunk_count,
395 "size": upload_stats.size,
16f6766a 396 "csum": hex::encode(upload_stats.csum),
cf9271e2
DM
397 });
398 let _value = self.h2.post(&close_path, Some(param)).await?;
399 Ok(BackupStats {
3b60b509
DC
400 size: upload_stats.size as u64,
401 csum: upload_stats.csum,
cf9271e2
DM
402 })
403 }
404
e10fccf5 405 fn response_queue() -> (
cf9271e2 406 mpsc::Sender<h2::client::ResponseFuture>,
ef6d4967 407 oneshot::Receiver<Result<(), Error>>,
cf9271e2
DM
408 ) {
409 let (verify_queue_tx, verify_queue_rx) = mpsc::channel(100);
410 let (verify_result_tx, verify_result_rx) = oneshot::channel();
411
db0cb9ce
WB
412 // FIXME: check if this works as expected as replacement for the combinator below?
413 // tokio::spawn(async move {
414 // let result: Result<(), Error> = (async move {
415 // while let Some(response) = verify_queue_rx.recv().await {
416 // match H2Client::h2api_response(response.await?).await {
417 // Ok(result) => println!("RESPONSE: {:?}", result),
418 // Err(err) => bail!("pipelined request failed: {}", err),
419 // }
420 // }
421 // Ok(())
422 // }).await;
423 // let _ignore_closed_channel = verify_result_tx.send(result);
424 // });
425 // old code for reference?
426 tokio::spawn(
7c667013 427 ReceiverStream::new(verify_queue_rx)
cf9271e2 428 .map(Ok::<_, Error>)
323b2f3d 429 .try_for_each(move |response: h2::client::ResponseFuture| {
cf9271e2
DM
430 response
431 .map_err(Error::from)
432 .and_then(H2Client::h2api_response)
e10fccf5 433 .map_ok(move |result| log::debug!("RESPONSE: {:?}", result))
cf9271e2
DM
434 .map_err(|err| format_err!("pipelined request failed: {}", err))
435 })
436 .map(|result| {
ef6d4967
TL
437 let _ignore_closed_channel = verify_result_tx.send(result);
438 }),
cf9271e2
DM
439 );
440
441 (verify_queue_tx, verify_result_rx)
442 }
443
8db14689
WB
444 fn append_chunk_queue(
445 h2: H2Client,
446 wid: u64,
447 path: String,
8db14689 448 ) -> (UploadQueueSender, UploadResultReceiver) {
cf9271e2
DM
449 let (verify_queue_tx, verify_queue_rx) = mpsc::channel(64);
450 let (verify_result_tx, verify_result_rx) = oneshot::channel();
451
db0cb9ce
WB
452 // FIXME: async-block-ify this code!
453 tokio::spawn(
7c667013 454 ReceiverStream::new(verify_queue_rx)
cf9271e2
DM
455 .map(Ok::<_, Error>)
456 .and_then(move |(merged_chunk_info, response): (MergedChunkInfo, Option<h2::client::ResponseFuture>)| {
457 match (response, merged_chunk_info) {
458 (Some(response), MergedChunkInfo::Known(list)) => {
be3a0295 459 Either::Left(
cf9271e2
DM
460 response
461 .map_err(Error::from)
462 .and_then(H2Client::h2api_response)
463 .and_then(move |_result| {
464 future::ok(MergedChunkInfo::Known(list))
465 })
466 )
467 }
468 (None, MergedChunkInfo::Known(list)) => {
be3a0295 469 Either::Right(future::ok(MergedChunkInfo::Known(list)))
cf9271e2
DM
470 }
471 _ => unreachable!(),
472 }
473 })
474 .merge_known_chunks()
475 .and_then(move |merged_chunk_info| {
476 match merged_chunk_info {
477 MergedChunkInfo::Known(chunk_list) => {
478 let mut digest_list = vec![];
479 let mut offset_list = vec![];
480 for (offset, digest) in chunk_list {
16f6766a 481 digest_list.push(hex::encode(digest));
cf9271e2
DM
482 offset_list.push(offset);
483 }
e10fccf5 484 log::debug!("append chunks list len ({})", digest_list.len());
cf9271e2
DM
485 let param = json!({ "wid": wid, "digest-list": digest_list, "offset-list": offset_list });
486 let request = H2Client::request_builder("localhost", "PUT", &path, None, Some("application/json")).unwrap();
db0cb9ce 487 let param_data = bytes::Bytes::from(param.to_string().into_bytes());
cf9271e2 488 let upload_data = Some(param_data);
8db14689 489 h2.send_request(request, upload_data)
cf9271e2
DM
490 .and_then(move |response| {
491 response
492 .map_err(Error::from)
493 .and_then(H2Client::h2api_response)
494 .map_ok(|_| ())
495 })
496 .map_err(|err| format_err!("pipelined request failed: {}", err))
497 }
498 _ => unreachable!(),
499 }
500 })
501 .try_for_each(|_| future::ok(()))
502 .map(|result| {
503 let _ignore_closed_channel = verify_result_tx.send(result);
504 })
505 );
506
507 (verify_queue_tx, verify_result_rx)
508 }
509
b957aa81 510 pub async fn download_previous_fixed_index(
cf9271e2 511 &self,
cf9271e2 512 archive_name: &str,
b957aa81 513 manifest: &BackupManifest,
ef6d4967 514 known_chunks: Arc<Mutex<HashSet<[u8; 32]>>>,
b957aa81 515 ) -> Result<FixedIndexReader, Error> {
b957aa81
DM
516 let mut tmpfile = std::fs::OpenOptions::new()
517 .write(true)
518 .read(true)
519 .custom_flags(libc::O_TMPFILE)
520 .open("/tmp")?;
cf9271e2 521
b957aa81 522 let param = json!({ "archive-name": archive_name });
ef6d4967
TL
523 self.h2
524 .download("previous", Some(param), &mut tmpfile)
525 .await?;
b957aa81 526
ef6d4967
TL
527 let index = FixedIndexReader::new(tmpfile).map_err(|err| {
528 format_err!("unable to read fixed index '{}' - {}", archive_name, err)
529 })?;
b957aa81
DM
530 // Note: do not use values stored in index (not trusted) - instead, computed them again
531 let (csum, size) = index.compute_csum();
532 manifest.verify_file(archive_name, &csum, size)?;
533
534 // add index chunks to known chunks
535 let mut known_chunks = known_chunks.lock().unwrap();
536 for i in 0..index.index_count() {
537 known_chunks.insert(*index.index_digest(i).unwrap());
538 }
cf9271e2 539
e10fccf5
HL
540 log::debug!(
541 "{}: known chunks list length is {}",
542 archive_name,
543 index.index_count()
544 );
cf9271e2 545
b957aa81
DM
546 Ok(index)
547 }
548
549 pub async fn download_previous_dynamic_index(
550 &self,
551 archive_name: &str,
552 manifest: &BackupManifest,
ef6d4967 553 known_chunks: Arc<Mutex<HashSet<[u8; 32]>>>,
b957aa81 554 ) -> Result<DynamicIndexReader, Error> {
b957aa81
DM
555 let mut tmpfile = std::fs::OpenOptions::new()
556 .write(true)
557 .read(true)
558 .custom_flags(libc::O_TMPFILE)
559 .open("/tmp")?;
cf9271e2 560
b957aa81 561 let param = json!({ "archive-name": archive_name });
ef6d4967
TL
562 self.h2
563 .download("previous", Some(param), &mut tmpfile)
564 .await?;
b957aa81 565
ef6d4967
TL
566 let index = DynamicIndexReader::new(tmpfile).map_err(|err| {
567 format_err!("unable to read dynmamic index '{}' - {}", archive_name, err)
568 })?;
b957aa81
DM
569 // Note: do not use values stored in index (not trusted) - instead, computed them again
570 let (csum, size) = index.compute_csum();
571 manifest.verify_file(archive_name, &csum, size)?;
572
573 // add index chunks to known chunks
574 let mut known_chunks = known_chunks.lock().unwrap();
575 for i in 0..index.index_count() {
576 known_chunks.insert(*index.index_digest(i).unwrap());
cf9271e2
DM
577 }
578
e10fccf5
HL
579 log::debug!(
580 "{}: known chunks list length is {}",
581 archive_name,
582 index.index_count()
583 );
ee5fe978 584
b957aa81
DM
585 Ok(index)
586 }
587
8b7f8d3f
FG
588 /// Retrieve backup time of last backup
589 pub async fn previous_backup_time(&self) -> Result<Option<i64>, Error> {
590 let data = self.h2.get("previous_backup_time", None).await?;
ef6d4967
TL
591 serde_json::from_value(data).map_err(|err| {
592 format_err!(
593 "Failed to parse backup time value returned by server - {}",
594 err
595 )
596 })
8b7f8d3f
FG
597 }
598
b957aa81
DM
599 /// Download backup manifest (index.json) of last backup
600 pub async fn download_previous_manifest(&self) -> Result<BackupManifest, Error> {
b957aa81
DM
601 let mut raw_data = Vec::with_capacity(64 * 1024);
602
603 let param = json!({ "archive-name": MANIFEST_BLOB_NAME });
ef6d4967
TL
604 self.h2
605 .download("previous", Some(param), &mut raw_data)
606 .await?;
b957aa81 607
39f18b30 608 let blob = DataBlob::load_from_reader(&mut &raw_data[..])?;
8819d1f2
FG
609 // no expected digest available
610 let data = blob.decode(self.crypt_config.as_ref().map(Arc::as_ref), None)?;
3dacedce 611
ef6d4967
TL
612 let manifest =
613 BackupManifest::from_data(&data[..], self.crypt_config.as_ref().map(Arc::as_ref))?;
b957aa81
DM
614
615 Ok(manifest)
cf9271e2
DM
616 }
617
8db14689 618 // We have no `self` here for `h2` and `verbose`, the only other arg "common" with 1 other
d1d74c43 619 // function in the same path is `wid`, so those 3 could be in a struct, but there's no real use
8db14689
WB
620 // since this is a private method.
621 #[allow(clippy::too_many_arguments)]
cf9271e2
DM
622 fn upload_chunk_info_stream(
623 h2: H2Client,
624 wid: u64,
625 stream: impl Stream<Item = Result<bytes::BytesMut, Error>>,
626 prefix: &str,
ef6d4967 627 known_chunks: Arc<Mutex<HashSet<[u8; 32]>>>,
cf9271e2 628 crypt_config: Option<Arc<CryptConfig>>,
3638341a 629 compress: bool,
3b60b509 630 ) -> impl Future<Output = Result<UploadStats, Error>> {
6e1deb15
TL
631 let total_chunks = Arc::new(AtomicUsize::new(0));
632 let total_chunks2 = total_chunks.clone();
633 let known_chunk_count = Arc::new(AtomicUsize::new(0));
634 let known_chunk_count2 = known_chunk_count.clone();
cf9271e2
DM
635
636 let stream_len = Arc::new(AtomicUsize::new(0));
637 let stream_len2 = stream_len.clone();
924373d2
DC
638 let compressed_stream_len = Arc::new(AtomicU64::new(0));
639 let compressed_stream_len2 = compressed_stream_len.clone();
6e1deb15
TL
640 let reused_len = Arc::new(AtomicUsize::new(0));
641 let reused_len2 = reused_len.clone();
cf9271e2
DM
642
643 let append_chunk_path = format!("{}_index", prefix);
644 let upload_chunk_path = format!("{}_chunk", prefix);
645 let is_fixed_chunk_size = prefix == "fixed";
646
647 let (upload_queue, upload_result) =
e10fccf5 648 Self::append_chunk_queue(h2.clone(), wid, append_chunk_path);
cf9271e2
DM
649
650 let start_time = std::time::Instant::now();
651
652 let index_csum = Arc::new(Mutex::new(Some(openssl::sha::Sha256::new())));
653 let index_csum_2 = index_csum.clone();
654
655 stream
656 .and_then(move |data| {
cf9271e2
DM
657 let chunk_len = data.len();
658
6e1deb15 659 total_chunks.fetch_add(1, Ordering::SeqCst);
cf9271e2
DM
660 let offset = stream_len.fetch_add(chunk_len, Ordering::SeqCst) as u64;
661
ef6d4967 662 let mut chunk_builder = DataChunkBuilder::new(data.as_ref()).compress(compress);
cf9271e2
DM
663
664 if let Some(ref crypt_config) = crypt_config {
3638341a 665 chunk_builder = chunk_builder.crypt_config(crypt_config);
cf9271e2
DM
666 }
667
668 let mut known_chunks = known_chunks.lock().unwrap();
669 let digest = chunk_builder.digest();
670
671 let mut guard = index_csum.lock().unwrap();
672 let csum = guard.as_mut().unwrap();
673
674 let chunk_end = offset + chunk_len as u64;
675
ef6d4967
TL
676 if !is_fixed_chunk_size {
677 csum.update(&chunk_end.to_le_bytes());
678 }
cf9271e2
DM
679 csum.update(digest);
680
681 let chunk_is_known = known_chunks.contains(digest);
682 if chunk_is_known {
6e1deb15
TL
683 known_chunk_count.fetch_add(1, Ordering::SeqCst);
684 reused_len.fetch_add(chunk_len, Ordering::SeqCst);
cf9271e2
DM
685 future::ok(MergedChunkInfo::Known(vec![(offset, *digest)]))
686 } else {
924373d2 687 let compressed_stream_len2 = compressed_stream_len.clone();
cf9271e2 688 known_chunks.insert(*digest);
ef6d4967 689 future::ready(chunk_builder.build().map(move |(chunk, digest)| {
924373d2 690 compressed_stream_len2.fetch_add(chunk.raw_size(), Ordering::SeqCst);
ef6d4967 691 MergedChunkInfo::New(ChunkInfo {
cf9271e2
DM
692 chunk,
693 digest,
694 chunk_len: chunk_len as u64,
695 offset,
ef6d4967
TL
696 })
697 }))
cf9271e2
DM
698 }
699 })
700 .merge_known_chunks()
701 .try_for_each(move |merged_chunk_info| {
0bfcea6a 702 let upload_queue = upload_queue.clone();
cf9271e2
DM
703
704 if let MergedChunkInfo::New(chunk_info) = merged_chunk_info {
705 let offset = chunk_info.offset;
706 let digest = chunk_info.digest;
16f6766a 707 let digest_str = hex::encode(digest);
cf9271e2 708
e10fccf5
HL
709 log::trace!(
710 "upload new chunk {} ({} bytes, offset {})",
711 digest_str,
712 chunk_info.chunk_len,
713 offset
714 );
cf9271e2 715
db0cb9ce 716 let chunk_data = chunk_info.chunk.into_inner();
cf9271e2
DM
717 let param = json!({
718 "wid": wid,
719 "digest": digest_str,
720 "size": chunk_info.chunk_len,
721 "encoded-size": chunk_data.len(),
722 });
723
724 let ct = "application/octet-stream";
ef6d4967
TL
725 let request = H2Client::request_builder(
726 "localhost",
727 "POST",
728 &upload_chunk_path,
729 Some(param),
730 Some(ct),
731 )
732 .unwrap();
cf9271e2
DM
733 let upload_data = Some(bytes::Bytes::from(chunk_data));
734
735 let new_info = MergedChunkInfo::Known(vec![(offset, digest)]);
736
be3a0295 737 Either::Left(h2.send_request(request, upload_data).and_then(
ef6d4967 738 move |response| async move {
cf9271e2
DM
739 upload_queue
740 .send((new_info, Some(response)))
741 .await
ef6d4967
TL
742 .map_err(|err| {
743 format_err!("failed to send to upload queue: {}", err)
744 })
745 },
746 ))
cf9271e2 747 } else {
be3a0295 748 Either::Right(async move {
cf9271e2
DM
749 upload_queue
750 .send((merged_chunk_info, None))
751 .await
db0cb9ce 752 .map_err(|err| format_err!("failed to send to upload queue: {}", err))
cf9271e2
DM
753 })
754 }
755 })
ef6d4967 756 .then(move |result| async move { upload_result.await?.and(result) }.boxed())
cf9271e2 757 .and_then(move |_| {
6e1deb15 758 let duration = start_time.elapsed();
3b60b509
DC
759 let chunk_count = total_chunks2.load(Ordering::SeqCst);
760 let chunk_reused = known_chunk_count2.load(Ordering::SeqCst);
761 let size = stream_len2.load(Ordering::SeqCst);
762 let size_reused = reused_len2.load(Ordering::SeqCst);
924373d2 763 let size_compressed = compressed_stream_len2.load(Ordering::SeqCst) as usize;
cf9271e2
DM
764
765 let mut guard = index_csum_2.lock().unwrap();
766 let csum = guard.take().unwrap().finish();
767
3b60b509
DC
768 futures::future::ok(UploadStats {
769 chunk_count,
770 chunk_reused,
771 size,
772 size_reused,
924373d2 773 size_compressed,
ef6d4967
TL
774 duration,
775 csum,
3b60b509 776 })
cf9271e2
DM
777 })
778 }
779
1ffe0301 780 /// Upload speed test - prints result to stderr
e10fccf5 781 pub async fn upload_speedtest(&self) -> Result<f64, Error> {
cf9271e2
DM
782 let mut data = vec![];
783 // generate pseudo random byte sequence
ef6d4967 784 for i in 0..1024 * 1024 {
cf9271e2 785 for j in 0..4 {
ef6d4967 786 let byte = ((i >> (j << 3)) & 0xff) as u8;
cf9271e2
DM
787 data.push(byte);
788 }
789 }
790
791 let item_len = data.len();
792
793 let mut repeat = 0;
794
e10fccf5 795 let (upload_queue, upload_result) = Self::response_queue();
cf9271e2
DM
796
797 let start_time = std::time::Instant::now();
798
799 loop {
800 repeat += 1;
801 if start_time.elapsed().as_secs() >= 5 {
802 break;
803 }
804
e10fccf5 805 log::debug!("send test data ({} bytes)", data.len());
ef6d4967
TL
806 let request =
807 H2Client::request_builder("localhost", "POST", "speedtest", None, None).unwrap();
808 let request_future = self
809 .h2
810 .send_request(request, Some(bytes::Bytes::from(data.clone())))
811 .await?;
cf9271e2
DM
812
813 upload_queue.send(request_future).await?;
814 }
815
816 drop(upload_queue); // close queue
817
818 let _ = upload_result.await?;
819
e10fccf5 820 log::info!(
ef6d4967
TL
821 "Uploaded {} chunks in {} seconds.",
822 repeat,
823 start_time.elapsed().as_secs()
824 );
825 let speed = ((item_len * (repeat as usize)) as f64) / start_time.elapsed().as_secs_f64();
e10fccf5 826 log::info!(
ef6d4967
TL
827 "Time per request: {} microseconds.",
828 (start_time.elapsed().as_micros()) / (repeat as u128)
829 );
cf9271e2
DM
830
831 Ok(speed)
832 }
833}