import 15.2.0 Octopus source

[ceph.git] / ceph / src / zstd / lib / dictBuilder / zdict.c
diff --git a/ceph/src/zstd/lib/dictBuilder/zdict.c b/ceph/src/zstd/lib/dictBuilder/zdict.c

index 1bb8b0683d049194a92e30b287cead96ad9dbf9e..c753da0dbbfe7e1bd6361c1820aecff66e2ea89a 100644 (file)
--- a/ceph/src/zstd/lib/dictBuilder/zdict.c
+++ b/ceph/src/zstd/lib/dictBuilder/zdict.c
@@ -103,7 +103,7 @@ unsigned ZDICT_getDictID(const void* dictBuffer, size_t dictSize)
  /*-********************************************************
  *  Dictionary training functions
  **********************************************************/
-static unsigned ZDICT_NbCommonBytes (register size_t val)
+static unsigned ZDICT_NbCommonBytes (size_t val)
  {
      if (MEM_isLittleEndian()) {
          if (MEM_64bits()) {
@@ -207,7 +207,6 @@ static dictItem ZDICT_analyzePos(
      U32 cumulLength[LLIMIT] = {0};
      U32 savings[LLIMIT] = {0};
      const BYTE* b = (const BYTE*)buffer;
-    size_t length;
      size_t maxLength = LLIMIT;
      size_t pos = suffix[start];
      U32 end = start;
@@ -222,26 +221,30 @@ static dictItem ZDICT_analyzePos(
         ||(MEM_read16(b+pos+1) == MEM_read16(b+pos+3))
         ||(MEM_read16(b+pos+2) == MEM_read16(b+pos+4)) ) {
          /* skip and mark segment */
-        U16 u16 = MEM_read16(b+pos+4);
-        U32 u, e = 6;
-        while (MEM_read16(b+pos+e) == u16) e+=2 ;
-        if (b[pos+e] == b[pos+e-1]) e++;
-        for (u=1; u<e; u++)
+        U16 const pattern16 = MEM_read16(b+pos+4);
+        U32 u, patternEnd = 6;
+        while (MEM_read16(b+pos+patternEnd) == pattern16) patternEnd+=2 ;
+        if (b[pos+patternEnd] == b[pos+patternEnd-1]) patternEnd++;
+        for (u=1; u<patternEnd; u++)
              doneMarks[pos+u] = 1;
          return solution;
      }
  
      /* look forward */
-    do {
-        end++;
-        length = ZDICT_count(b + pos, b + suffix[end]);
-    } while (length >=MINMATCHLENGTH);
+    {   size_t length;
+        do {
+            end++;
+            length = ZDICT_count(b + pos, b + suffix[end]);
+        } while (length >= MINMATCHLENGTH);
+    }
  
      /* look backward */
-    do {
-        length = ZDICT_count(b + pos, b + *(suffix+start-1));
-        if (length >=MINMATCHLENGTH) start--;
-    } while(length >= MINMATCHLENGTH);
+    {   size_t length;
+        do {
+            length = ZDICT_count(b + pos, b + *(suffix+start-1));
+            if (length >=MINMATCHLENGTH) start--;
+        } while(length >= MINMATCHLENGTH);
+    }
  
      /* exit if not found a minimum nb of repetitions */
      if (end-start < minRatio) {
@@ -252,15 +255,15 @@ static dictItem ZDICT_analyzePos(
      }
  
      {   int i;
-        U32 searchLength;
+        U32 mml;
          U32 refinedStart = start;
          U32 refinedEnd = end;
  
          DISPLAYLEVEL(4, "\n");
-        DISPLAYLEVEL(4, "found %3u matches of length >= %i at pos %7u  ", (U32)(end-start), MINMATCHLENGTH, (U32)pos);
+        DISPLAYLEVEL(4, "found %3u matches of length >= %i at pos %7u  ", (unsigned)(end-start), MINMATCHLENGTH, (unsigned)pos);
          DISPLAYLEVEL(4, "\n");
  
-        for (searchLength = MINMATCHLENGTH ; ; searchLength++) {
+        for (mml = MINMATCHLENGTH ; ; mml++) {
              BYTE currentChar = 0;
              U32 currentCount = 0;
              U32 currentID = refinedStart;
@@ -268,13 +271,13 @@ static dictItem ZDICT_analyzePos(
              U32 selectedCount = 0;
              U32 selectedID = currentID;
              for (id =refinedStart; id < refinedEnd; id++) {
-                if (b[ suffix[id] + searchLength] != currentChar) {
+                if (b[suffix[id] + mml] != currentChar) {
                      if (currentCount > selectedCount) {
                          selectedCount = currentCount;
                          selectedID = currentID;
                      }
                      currentID = id;
-                    currentChar = b[ suffix[id] + searchLength];
+                    currentChar = b[ suffix[id] + mml];
                      currentCount = 0;
                  }
                  currentCount ++;
@@ -290,27 +293,30 @@ static dictItem ZDICT_analyzePos(
              refinedEnd = refinedStart + selectedCount;
          }
  
-        /* evaluate gain based on new ref */
+        /* evaluate gain based on new dict */
          start = refinedStart;
          pos = suffix[refinedStart];
          end = start;
          memset(lengthList, 0, sizeof(lengthList));
  
          /* look forward */
-        do {
-            end++;
-            length = ZDICT_count(b + pos, b + suffix[end]);
-            if (length >= LLIMIT) length = LLIMIT-1;
-            lengthList[length]++;
-        } while (length >=MINMATCHLENGTH);
+        {   size_t length;
+            do {
+                end++;
+                length = ZDICT_count(b + pos, b + suffix[end]);
+                if (length >= LLIMIT) length = LLIMIT-1;
+                lengthList[length]++;
+            } while (length >=MINMATCHLENGTH);
+        }
  
          /* look backward */
-        length = MINMATCHLENGTH;
-        while ((length >= MINMATCHLENGTH) & (start > 0)) {
-            length = ZDICT_count(b + pos, b + suffix[start - 1]);
-            if (length >= LLIMIT) length = LLIMIT - 1;
-            lengthList[length]++;
-            if (length >= MINMATCHLENGTH) start--;
+        {   size_t length = MINMATCHLENGTH;
+            while ((length >= MINMATCHLENGTH) & (start > 0)) {
+                length = ZDICT_count(b + pos, b + suffix[start - 1]);
+                if (length >= LLIMIT) length = LLIMIT - 1;
+                lengthList[length]++;
+                if (length >= MINMATCHLENGTH) start--;
+            }
          }
  
          /* largest useful length */
@@ -335,8 +341,8 @@ static dictItem ZDICT_analyzePos(
          for (i=MINMATCHLENGTH; i<=(int)maxLength; i++)
              savings[i] = savings[i-1] + (lengthList[i] * (i-3));
  
-        DISPLAYLEVEL(4, "Selected ref at position %u, of length %u : saves %u (ratio: %.2f)  \n",
-                     (U32)pos, (U32)maxLength, savings[maxLength], (double)savings[maxLength] / maxLength);
+        DISPLAYLEVEL(4, "Selected dict at position %u, of length %u : saves %u (ratio: %.2f)  \n",
+                     (unsigned)pos, (unsigned)maxLength, (unsigned)savings[maxLength], (double)savings[maxLength] / maxLength);
  
          solution.pos = (U32)pos;
          solution.length = (U32)maxLength;
@@ -345,12 +351,12 @@ static dictItem ZDICT_analyzePos(
          /* mark positions done */
          {   U32 id;
              for (id=start; id<end; id++) {
-                U32 p, pEnd;
+                U32 p, pEnd, length;
                  U32 const testedPos = suffix[id];
                  if (testedPos == pos)
                      length = solution.length;
                  else {
-                    length = ZDICT_count(b+pos, b+testedPos);
+                    length = (U32)ZDICT_count(b+pos, b+testedPos);
                      if (length > solution.length) length = solution.length;
                  }
                  pEnd = (U32)(testedPos + length);
@@ -491,7 +497,7 @@ static U32 ZDICT_dictSize(const dictItem* dictList)
  static size_t ZDICT_trainBuffer_legacy(dictItem* dictList, U32 dictListSize,
                              const void* const buffer, size_t bufferSize,   /* buffer must end with noisy guard band */
                              const size_t* fileSizes, unsigned nbFiles,
-                            U32 minRatio, U32 notificationLevel)
+                            unsigned minRatio, U32 notificationLevel)
  {
      int* const suffix0 = (int*)malloc((bufferSize+2)*sizeof(*suffix0));
      int* const suffix = suffix0+1;
@@ -517,11 +523,11 @@ static size_t ZDICT_trainBuffer_legacy(dictItem* dictList, U32 dictListSize,
      memset(doneMarks, 0, bufferSize+16);
  
      /* limit sample set size (divsufsort limitation)*/
-    if (bufferSize > ZDICT_MAX_SAMPLES_SIZE) DISPLAYLEVEL(3, "sample set too large : reduced to %u MB ...\n", (U32)(ZDICT_MAX_SAMPLES_SIZE>>20));
+    if (bufferSize > ZDICT_MAX_SAMPLES_SIZE) DISPLAYLEVEL(3, "sample set too large : reduced to %u MB ...\n", (unsigned)(ZDICT_MAX_SAMPLES_SIZE>>20));
      while (bufferSize > ZDICT_MAX_SAMPLES_SIZE) bufferSize -= fileSizes[--nbFiles];
  
      /* sort */
-    DISPLAYLEVEL(2, "sorting %u files of total size %u MB ...\n", nbFiles, (U32)(bufferSize>>20));
+    DISPLAYLEVEL(2, "sorting %u files of total size %u MB ...\n", nbFiles, (unsigned)(bufferSize>>20));
      {   int const divSuftSortResult = divsufsort((const unsigned char*)buffer, suffix, (int)bufferSize, 0);
          if (divSuftSortResult != 0) { result = ERROR(GENERIC); goto _cleanup; }
      }
@@ -575,29 +581,31 @@ static void ZDICT_fillNoise(void* buffer, size_t length)
  
  typedef struct
  {
-    ZSTD_CCtx* ref;
-    ZSTD_CCtx* zc;
+    ZSTD_CDict* dict;    /* dictionary */
+    ZSTD_CCtx* zc;     /* working context */
      void* workPlace;   /* must be ZSTD_BLOCKSIZE_MAX allocated */
  } EStats_ress_t;
  
  #define MAXREPOFFSET 1024
  
  static void ZDICT_countEStats(EStats_ress_t esr, ZSTD_parameters params,
-                            U32* countLit, U32* offsetcodeCount, U32* matchlengthCount, U32* litlengthCount, U32* repOffsets,
-                            const void* src, size_t srcSize, U32 notificationLevel)
+                              unsigned* countLit, unsigned* offsetcodeCount, unsigned* matchlengthCount, unsigned* litlengthCount, U32* repOffsets,
+                              const void* src, size_t srcSize,
+                              U32 notificationLevel)
  {
      size_t const blockSizeMax = MIN (ZSTD_BLOCKSIZE_MAX, 1 << params.cParams.windowLog);
      size_t cSize;
  
      if (srcSize > blockSizeMax) srcSize = blockSizeMax;   /* protection vs large samples */
-    {  size_t const errorCode = ZSTD_copyCCtx(esr.zc, esr.ref, 0);
-            if (ZSTD_isError(errorCode)) { DISPLAYLEVEL(1, "warning : ZSTD_copyCCtx failed \n"); return; }
+    {   size_t const errorCode = ZSTD_compressBegin_usingCDict(esr.zc, esr.dict);
+        if (ZSTD_isError(errorCode)) { DISPLAYLEVEL(1, "warning : ZSTD_compressBegin_usingCDict failed \n"); return; }
+
      }
      cSize = ZSTD_compressBlock(esr.zc, esr.workPlace, ZSTD_BLOCKSIZE_MAX, src, srcSize);
-    if (ZSTD_isError(cSize)) { DISPLAYLEVEL(3, "warning : could not compress sample size %u \n", (U32)srcSize); return; }
+    if (ZSTD_isError(cSize)) { DISPLAYLEVEL(3, "warning : could not compress sample size %u \n", (unsigned)srcSize); return; }
  
      if (cSize) {  /* if == 0; block is not compressible */
-        const seqStore_t* seqStorePtr = ZSTD_getSeqStore(esr.zc);
+        const seqStore_t* const seqStorePtr = ZSTD_getSeqStore(esr.zc);
  
          /* literals stats */
          {   const BYTE* bytePtr;
@@ -659,6 +667,18 @@ static void ZDICT_insertSortCount(offsetCount_t table[ZSTD_REP_NUM+1], U32 val,
      }
  }
  
+/* ZDICT_flatLit() :
+ * rewrite `countLit` to contain a mostly flat but still compressible distribution of literals.
+ * necessary to avoid generating a non-compressible distribution that HUF_writeCTable() cannot encode.
+ */
+static void ZDICT_flatLit(unsigned* countLit)
+{
+    int u;
+    for (u=1; u<256; u++) countLit[u] = 2;
+    countLit[0]   = 4;
+    countLit[253] = 1;
+    countLit[254] = 1;
+}
  
  #define OFFCODE_MAX 30  /* only applicable to first block */
  static size_t ZDICT_analyzeEntropy(void*  dstBuffer, size_t maxDstSize,
@@ -667,18 +687,18 @@ static size_t ZDICT_analyzeEntropy(void*  dstBuffer, size_t maxDstSize,
                               const void* dictBuffer, size_t  dictBufferSize,
                                     unsigned notificationLevel)
  {
-    U32 countLit[256];
+    unsigned countLit[256];
      HUF_CREATE_STATIC_CTABLE(hufTable, 255);
-    U32 offcodeCount[OFFCODE_MAX+1];
+    unsigned offcodeCount[OFFCODE_MAX+1];
      short offcodeNCount[OFFCODE_MAX+1];
      U32 offcodeMax = ZSTD_highbit32((U32)(dictBufferSize + 128 KB));
-    U32 matchLengthCount[MaxML+1];
+    unsigned matchLengthCount[MaxML+1];
      short matchLengthNCount[MaxML+1];
-    U32 litLengthCount[MaxLL+1];
+    unsigned litLengthCount[MaxLL+1];
      short litLengthNCount[MaxLL+1];
      U32 repOffset[MAXREPOFFSET];
      offsetCount_t bestRepOffset[ZSTD_REP_NUM+1];
-    EStats_ress_t esr;
+    EStats_ress_t esr = { NULL, NULL, NULL };
      ZSTD_parameters params;
      U32 u, huffLog = 11, Offlog = OffFSELog, mlLog = MLFSELog, llLog = LLFSELog, total;
      size_t pos = 0, errorCode;
@@ -688,14 +708,7 @@ static size_t ZDICT_analyzeEntropy(void*  dstBuffer, size_t maxDstSize,
      BYTE* dstPtr = (BYTE*)dstBuffer;
  
      /* init */
-    esr.ref = ZSTD_createCCtx();
-    esr.zc = ZSTD_createCCtx();
-    esr.workPlace = malloc(ZSTD_BLOCKSIZE_MAX);
-    if (!esr.ref || !esr.zc || !esr.workPlace) {
-        eSize = ERROR(memory_allocation);
-        DISPLAYLEVEL(1, "Not enough memory \n");
-        goto _cleanup;
-    }
+    DEBUGLOG(4, "ZDICT_analyzeEntropy");
      if (offcodeMax>OFFCODE_MAX) { eSize = ERROR(dictionaryCreation_failed); goto _cleanup; }   /* too large dictionary */
      for (u=0; u<256; u++) countLit[u] = 1;   /* any character must be described */
      for (u=0; u<=offcodeMax; u++) offcodeCount[u] = 1;
@@ -704,16 +717,19 @@ static size_t ZDICT_analyzeEntropy(void*  dstBuffer, size_t maxDstSize,
      memset(repOffset, 0, sizeof(repOffset));
      repOffset[1] = repOffset[4] = repOffset[8] = 1;
      memset(bestRepOffset, 0, sizeof(bestRepOffset));
-    if (compressionLevel<=0) compressionLevel = g_compressionLevel_default;
+    if (compressionLevel==0) compressionLevel = g_compressionLevel_default;
      params = ZSTD_getParams(compressionLevel, averageSampleSize, dictBufferSize);
-    {   size_t const beginResult = ZSTD_compressBegin_advanced(esr.ref, dictBuffer, dictBufferSize, params, 0);
-        if (ZSTD_isError(beginResult)) {
-            DISPLAYLEVEL(1, "error : ZSTD_compressBegin_advanced() failed : %s \n", ZSTD_getErrorName(beginResult));
-            eSize = ERROR(GENERIC);
-            goto _cleanup;
-    }   }
  
-    /* collect stats on all files */
+    esr.dict = ZSTD_createCDict_advanced(dictBuffer, dictBufferSize, ZSTD_dlm_byRef, ZSTD_dct_rawContent, params.cParams, ZSTD_defaultCMem);
+    esr.zc = ZSTD_createCCtx();
+    esr.workPlace = malloc(ZSTD_BLOCKSIZE_MAX);
+    if (!esr.dict || !esr.zc || !esr.workPlace) {
+        eSize = ERROR(memory_allocation);
+        DISPLAYLEVEL(1, "Not enough memory \n");
+        goto _cleanup;
+    }
+
+    /* collect stats on all samples */
      for (u=0; u<nbFiles; u++) {
          ZDICT_countEStats(esr, params,
                            countLit, offcodeCount, matchLengthCount, litLengthCount, repOffset,
@@ -722,14 +738,21 @@ static size_t ZDICT_analyzeEntropy(void*  dstBuffer, size_t maxDstSize,
          pos += fileSizes[u];
      }
  
-    /* analyze */
-    errorCode = HUF_buildCTable (hufTable, countLit, 255, huffLog);
-    if (HUF_isError(errorCode)) {
-        eSize = ERROR(GENERIC);
-        DISPLAYLEVEL(1, "HUF_buildCTable error \n");
-        goto _cleanup;
+    /* analyze, build stats, starting with literals */
+    {   size_t maxNbBits = HUF_buildCTable (hufTable, countLit, 255, huffLog);
+        if (HUF_isError(maxNbBits)) {
+            eSize = ERROR(GENERIC);
+            DISPLAYLEVEL(1, " HUF_buildCTable error \n");
+            goto _cleanup;
+        }
+        if (maxNbBits==8) {  /* not compressible : will fail on HUF_writeCTable() */
+            DISPLAYLEVEL(2, "warning : pathological dataset : literals are not compressible : samples are noisy or too regular \n");
+            ZDICT_flatLit(countLit);  /* replace distribution by a fake "mostly flat but still compressible" distribution, that HUF_writeCTable() can encode */
+            maxNbBits = HUF_buildCTable (hufTable, countLit, 255, huffLog);
+            assert(maxNbBits==9);
+        }
+        huffLog = (U32)maxNbBits;
      }
-    huffLog = (U32)errorCode;
  
      /* looking for most common first offsets */
      {   U32 offset;
@@ -829,7 +852,7 @@ static size_t ZDICT_analyzeEntropy(void*  dstBuffer, size_t maxDstSize,
      eSize += 12;
  
  _cleanup:
-    ZSTD_freeCCtx(esr.ref);
+    ZSTD_freeCDict(esr.dict);
      ZSTD_freeCCtx(esr.zc);
      free(esr.workPlace);
  
@@ -840,16 +863,17 @@ _cleanup:
  
  size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
                            const void* customDictContent, size_t dictContentSize,
-                          const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
-                          ZDICT_params_t params)
+                          const void* samplesBuffer, const size_t* samplesSizes,
+                          unsigned nbSamples, ZDICT_params_t params)
  {
      size_t hSize;
  #define HBUFFSIZE 256   /* should prove large enough for all entropy headers */
      BYTE header[HBUFFSIZE];
-    int const compressionLevel = (params.compressionLevel <= 0) ? g_compressionLevel_default : params.compressionLevel;
+    int const compressionLevel = (params.compressionLevel == 0) ? g_compressionLevel_default : params.compressionLevel;
      U32 const notificationLevel = params.notificationLevel;
  
      /* check conditions */
+    DEBUGLOG(4, "ZDICT_finalizeDictionary");
      if (dictBufferCapacity < dictContentSize) return ERROR(dstSize_tooSmall);
      if (dictContentSize < ZDICT_CONTENTSIZE_MIN) return ERROR(srcSize_wrong);
      if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) return ERROR(dstSize_tooSmall);
@@ -886,11 +910,12 @@ size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
  }
  
  
-size_t ZDICT_addEntropyTablesFromBuffer_advanced(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity,
-                                                 const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
-                                                 ZDICT_params_t params)
+static size_t ZDICT_addEntropyTablesFromBuffer_advanced(
+        void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity,
+        const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
+        ZDICT_params_t params)
  {
-    int const compressionLevel = (params.compressionLevel <= 0) ? g_compressionLevel_default : params.compressionLevel;
+    int const compressionLevel = (params.compressionLevel == 0) ? g_compressionLevel_default : params.compressionLevel;
      U32 const notificationLevel = params.notificationLevel;
      size_t hSize = 8;
  
@@ -919,7 +944,11 @@ size_t ZDICT_addEntropyTablesFromBuffer_advanced(void* dictBuffer, size_t dictCo
      return MIN(dictBufferCapacity, hSize+dictContentSize);
  }
  
-
+/* Hidden declaration for dbio.c */
+size_t ZDICT_trainFromBuffer_unsafe_legacy(
+                            void* dictBuffer, size_t maxDictSize,
+                            const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
+                            ZDICT_legacy_params_t params);
  /*! ZDICT_trainFromBuffer_unsafe_legacy() :
  *   Warning : `samplesBuffer` must be followed by noisy guard band.
  *   @return : size of dictionary, or an error code which can be tested with ZDICT_isError()
@@ -954,31 +983,33 @@ size_t ZDICT_trainFromBuffer_unsafe_legacy(
  
      /* display best matches */
      if (params.zParams.notificationLevel>= 3) {
-        U32 const nb = MIN(25, dictList[0].pos);
-        U32 const dictContentSize = ZDICT_dictSize(dictList);
-        U32 u;
-        DISPLAYLEVEL(3, "\n %u segments found, of total size %u \n", dictList[0].pos-1, dictContentSize);
+        unsigned const nb = MIN(25, dictList[0].pos);
+        unsigned const dictContentSize = ZDICT_dictSize(dictList);
+        unsigned u;
+        DISPLAYLEVEL(3, "\n %u segments found, of total size %u \n", (unsigned)dictList[0].pos-1, dictContentSize);
          DISPLAYLEVEL(3, "list %u best segments \n", nb-1);
          for (u=1; u<nb; u++) {
-            U32 const pos = dictList[u].pos;
-            U32 const length = dictList[u].length;
+            unsigned const pos = dictList[u].pos;
+            unsigned const length = dictList[u].length;
              U32 const printedLength = MIN(40, length);
-            if ((pos > samplesBuffSize) || ((pos + length) > samplesBuffSize))
+            if ((pos > samplesBuffSize) || ((pos + length) > samplesBuffSize)) {
+                free(dictList);
                  return ERROR(GENERIC);   /* should never happen */
+            }
              DISPLAYLEVEL(3, "%3u:%3u bytes at pos %8u, savings %7u bytes |",
-                         u, length, pos, dictList[u].savings);
+                         u, length, pos, (unsigned)dictList[u].savings);
              ZDICT_printHex((const char*)samplesBuffer+pos, printedLength);
              DISPLAYLEVEL(3, "| \n");
      }   }
  
  
      /* create dictionary */
-    {   U32 dictContentSize = ZDICT_dictSize(dictList);
+    {   unsigned dictContentSize = ZDICT_dictSize(dictList);
          if (dictContentSize < ZDICT_CONTENTSIZE_MIN) { free(dictList); return ERROR(dictionaryCreation_failed); }   /* dictionary content too small */
          if (dictContentSize < targetDictSize/4) {
-            DISPLAYLEVEL(2, "!  warning : selected content significantly smaller than requested (%u < %u) \n", dictContentSize, (U32)maxDictSize);
+            DISPLAYLEVEL(2, "!  warning : selected content significantly smaller than requested (%u < %u) \n", dictContentSize, (unsigned)maxDictSize);
              if (samplesBuffSize < 10 * targetDictSize)
-                DISPLAYLEVEL(2, "!  consider increasing the number of samples (total size : %u MB)\n", (U32)(samplesBuffSize>>20));
+                DISPLAYLEVEL(2, "!  consider increasing the number of samples (total size : %u MB)\n", (unsigned)(samplesBuffSize>>20));
              if (minRep > MINRATIO) {
                  DISPLAYLEVEL(2, "!  consider increasing selectivity to produce larger dictionary (-s%u) \n", selectivity+1);
                  DISPLAYLEVEL(2, "!  note : larger dictionaries are not necessarily better, test its efficiency on samples \n");
@@ -986,9 +1017,9 @@ size_t ZDICT_trainFromBuffer_unsafe_legacy(
          }
  
          if ((dictContentSize > targetDictSize*3) && (nbSamples > 2*MINRATIO) && (selectivity>1)) {
-            U32 proposedSelectivity = selectivity-1;
+            unsigned proposedSelectivity = selectivity-1;
              while ((nbSamples >> proposedSelectivity) <= MINRATIO) { proposedSelectivity--; }
-            DISPLAYLEVEL(2, "!  note : calculated dictionary significantly larger than requested (%u > %u) \n", dictContentSize, (U32)maxDictSize);
+            DISPLAYLEVEL(2, "!  note : calculated dictionary significantly larger than requested (%u > %u) \n", dictContentSize, (unsigned)maxDictSize);
              DISPLAYLEVEL(2, "!  consider increasing dictionary size, or produce denser dictionary (-s%u) \n", proposedSelectivity);
              DISPLAYLEVEL(2, "!  always test dictionary efficiency on real samples \n");
          }
@@ -1025,8 +1056,9 @@ size_t ZDICT_trainFromBuffer_unsafe_legacy(
  }
  
  
-/* issue : samplesBuffer need to be followed by a noisy guard band.
-*  work around : duplicate the buffer, and add the noise */
+/* ZDICT_trainFromBuffer_legacy() :
+ * issue : samplesBuffer need to be followed by a noisy guard band.
+ * work around : duplicate the buffer, and add the noise */
  size_t ZDICT_trainFromBuffer_legacy(void* dictBuffer, size_t dictBufferCapacity,
                                const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
                                ZDICT_legacy_params_t params)
@@ -1053,19 +1085,23 @@ size_t ZDICT_trainFromBuffer_legacy(void* dictBuffer, size_t dictBufferCapacity,
  size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
                               const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples)
  {
-    ZDICT_cover_params_t params;
+    ZDICT_fastCover_params_t params;
+    DEBUGLOG(3, "ZDICT_trainFromBuffer");
      memset(&params, 0, sizeof(params));
      params.d = 8;
      params.steps = 4;
-    /* Default to level 6 since no compression level information is avaialble */
-    params.zParams.compressionLevel = 6;
-    return ZDICT_optimizeTrainFromBuffer_cover(dictBuffer, dictBufferCapacity,
-                                               samplesBuffer, samplesSizes,
-                                               nbSamples, &params);
+    /* Default to level 6 since no compression level information is available */
+    params.zParams.compressionLevel = 3;
+#if defined(DEBUGLEVEL) && (DEBUGLEVEL>=1)
+    params.zParams.notificationLevel = DEBUGLEVEL;
+#endif
+    return ZDICT_optimizeTrainFromBuffer_fastCover(dictBuffer, dictBufferCapacity,
+                                               samplesBuffer, samplesSizes, nbSamples,
+                                               &params);
  }
  
  size_t ZDICT_addEntropyTablesFromBuffer(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity,
-                                        const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples)
+                                  const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples)
  {
      ZDICT_params_t params;
      memset(&params, 0, sizeof(params));