+++ /dev/null
-/* Copyright 2013 Google Inc. All Rights Reserved.\r
-\r
- Distributed under MIT license.\r
- See file LICENSE for detail or copy at https://opensource.org/licenses/MIT\r
-*/\r
-\r
-/* Transformations on dictionary words. */\r
-\r
-#ifndef BROTLI_DEC_TRANSFORM_H_\r
-#define BROTLI_DEC_TRANSFORM_H_\r
-\r
-#include "../common/types.h"\r
-#include "./port.h"\r
-\r
-#if defined(__cplusplus) || defined(c_plusplus)\r
-extern "C" {\r
-#endif\r
-\r
-enum WordTransformType {\r
- kIdentity = 0,\r
- kOmitLast1 = 1,\r
- kOmitLast2 = 2,\r
- kOmitLast3 = 3,\r
- kOmitLast4 = 4,\r
- kOmitLast5 = 5,\r
- kOmitLast6 = 6,\r
- kOmitLast7 = 7,\r
- kOmitLast8 = 8,\r
- kOmitLast9 = 9,\r
- kUppercaseFirst = 10,\r
- kUppercaseAll = 11,\r
- kOmitFirst1 = 12,\r
- kOmitFirst2 = 13,\r
- kOmitFirst3 = 14,\r
- kOmitFirst4 = 15,\r
- kOmitFirst5 = 16,\r
- kOmitFirst6 = 17,\r
- kOmitFirst7 = 18,\r
- kOmitFirst8 = 19,\r
- kOmitFirst9 = 20\r
-};\r
-\r
-typedef struct {\r
- const uint8_t prefix_id;\r
- const uint8_t transform;\r
- const uint8_t suffix_id;\r
-} Transform;\r
-\r
-static const char kPrefixSuffix[208] =\r
- "\0 \0, \0 of the \0 of \0s \0.\0 and \0 in \0\"\0 to \0\">\0\n\0. \0]\0"\r
- " for \0 a \0 that \0\'\0 with \0 from \0 by \0(\0. The \0 on \0 as \0"\r
- " is \0ing \0\n\t\0:\0ed \0=\"\0 at \0ly \0,\0=\'\0.com/\0. This \0"\r
- " not \0er \0al \0ful \0ive \0less \0est \0ize \0\xc2\xa0\0ous ";\r
-\r
-enum {\r
- /* EMPTY = ""\r
- SP = " "\r
- DQUOT = "\""\r
- SQUOT = "'"\r
- CLOSEBR = "]"\r
- OPEN = "("\r
- SLASH = "/"\r
- NBSP = non-breaking space "\0xc2\xa0"\r
- */\r
- kPFix_EMPTY = 0,\r
- kPFix_SP = 1,\r
- kPFix_COMMASP = 3,\r
- kPFix_SPofSPtheSP = 6,\r
- kPFix_SPtheSP = 9,\r
- kPFix_eSP = 12,\r
- kPFix_SPofSP = 15,\r
- kPFix_sSP = 20,\r
- kPFix_DOT = 23,\r
- kPFix_SPandSP = 25,\r
- kPFix_SPinSP = 31,\r
- kPFix_DQUOT = 36,\r
- kPFix_SPtoSP = 38,\r
- kPFix_DQUOTGT = 43,\r
- kPFix_NEWLINE = 46,\r
- kPFix_DOTSP = 48,\r
- kPFix_CLOSEBR = 51,\r
- kPFix_SPforSP = 53,\r
- kPFix_SPaSP = 59,\r
- kPFix_SPthatSP = 63,\r
- kPFix_SQUOT = 70,\r
- kPFix_SPwithSP = 72,\r
- kPFix_SPfromSP = 79,\r
- kPFix_SPbySP = 86,\r
- kPFix_OPEN = 91,\r
- kPFix_DOTSPTheSP = 93,\r
- kPFix_SPonSP = 100,\r
- kPFix_SPasSP = 105,\r
- kPFix_SPisSP = 110,\r
- kPFix_ingSP = 115,\r
- kPFix_NEWLINETAB = 120,\r
- kPFix_COLON = 123,\r
- kPFix_edSP = 125,\r
- kPFix_EQDQUOT = 129,\r
- kPFix_SPatSP = 132,\r
- kPFix_lySP = 137,\r
- kPFix_COMMA = 141,\r
- kPFix_EQSQUOT = 143,\r
- kPFix_DOTcomSLASH = 146,\r
- kPFix_DOTSPThisSP = 152,\r
- kPFix_SPnotSP = 160,\r
- kPFix_erSP = 166,\r
- kPFix_alSP = 170,\r
- kPFix_fulSP = 174,\r
- kPFix_iveSP = 179,\r
- kPFix_lessSP = 184,\r
- kPFix_estSP = 190,\r
- kPFix_izeSP = 195,\r
- kPFix_NBSP = 200,\r
- kPFix_ousSP = 203\r
-};\r
-\r
-static const Transform kTransforms[] = {\r
- { kPFix_EMPTY, kIdentity, kPFix_EMPTY },\r
- { kPFix_EMPTY, kIdentity, kPFix_SP },\r
- { kPFix_SP, kIdentity, kPFix_SP },\r
- { kPFix_EMPTY, kOmitFirst1, kPFix_EMPTY },\r
- { kPFix_EMPTY, kUppercaseFirst, kPFix_SP },\r
- { kPFix_EMPTY, kIdentity, kPFix_SPtheSP },\r
- { kPFix_SP, kIdentity, kPFix_EMPTY },\r
- { kPFix_sSP, kIdentity, kPFix_SP },\r
- { kPFix_EMPTY, kIdentity, kPFix_SPofSP },\r
- { kPFix_EMPTY, kUppercaseFirst, kPFix_EMPTY },\r
- { kPFix_EMPTY, kIdentity, kPFix_SPandSP },\r
- { kPFix_EMPTY, kOmitFirst2, kPFix_EMPTY },\r
- { kPFix_EMPTY, kOmitLast1, kPFix_EMPTY },\r
- { kPFix_COMMASP, kIdentity, kPFix_SP },\r
- { kPFix_EMPTY, kIdentity, kPFix_COMMASP },\r
- { kPFix_SP, kUppercaseFirst, kPFix_SP },\r
- { kPFix_EMPTY, kIdentity, kPFix_SPinSP },\r
- { kPFix_EMPTY, kIdentity, kPFix_SPtoSP },\r
- { kPFix_eSP, kIdentity, kPFix_SP },\r
- { kPFix_EMPTY, kIdentity, kPFix_DQUOT },\r
- { kPFix_EMPTY, kIdentity, kPFix_DOT },\r
- { kPFix_EMPTY, kIdentity, kPFix_DQUOTGT },\r
- { kPFix_EMPTY, kIdentity, kPFix_NEWLINE },\r
- { kPFix_EMPTY, kOmitLast3, kPFix_EMPTY },\r
- { kPFix_EMPTY, kIdentity, kPFix_CLOSEBR },\r
- { kPFix_EMPTY, kIdentity, kPFix_SPforSP },\r
- { kPFix_EMPTY, kOmitFirst3, kPFix_EMPTY },\r
- { kPFix_EMPTY, kOmitLast2, kPFix_EMPTY },\r
- { kPFix_EMPTY, kIdentity, kPFix_SPaSP },\r
- { kPFix_EMPTY, kIdentity, kPFix_SPthatSP },\r
- { kPFix_SP, kUppercaseFirst, kPFix_EMPTY },\r
- { kPFix_EMPTY, kIdentity, kPFix_DOTSP },\r
- { kPFix_DOT, kIdentity, kPFix_EMPTY },\r
- { kPFix_SP, kIdentity, kPFix_COMMASP },\r
- { kPFix_EMPTY, kOmitFirst4, kPFix_EMPTY },\r
- { kPFix_EMPTY, kIdentity, kPFix_SPwithSP },\r
- { kPFix_EMPTY, kIdentity, kPFix_SQUOT },\r
- { kPFix_EMPTY, kIdentity, kPFix_SPfromSP },\r
- { kPFix_EMPTY, kIdentity, kPFix_SPbySP },\r
- { kPFix_EMPTY, kOmitFirst5, kPFix_EMPTY },\r
- { kPFix_EMPTY, kOmitFirst6, kPFix_EMPTY },\r
- { kPFix_SPtheSP, kIdentity, kPFix_EMPTY },\r
- { kPFix_EMPTY, kOmitLast4, kPFix_EMPTY },\r
- { kPFix_EMPTY, kIdentity, kPFix_DOTSPTheSP },\r
- { kPFix_EMPTY, kUppercaseAll, kPFix_EMPTY },\r
- { kPFix_EMPTY, kIdentity, kPFix_SPonSP },\r
- { kPFix_EMPTY, kIdentity, kPFix_SPasSP },\r
- { kPFix_EMPTY, kIdentity, kPFix_SPisSP },\r
- { kPFix_EMPTY, kOmitLast7, kPFix_EMPTY },\r
- { kPFix_EMPTY, kOmitLast1, kPFix_ingSP },\r
- { kPFix_EMPTY, kIdentity, kPFix_NEWLINETAB },\r
- { kPFix_EMPTY, kIdentity, kPFix_COLON },\r
- { kPFix_SP, kIdentity, kPFix_DOTSP },\r
- { kPFix_EMPTY, kIdentity, kPFix_edSP },\r
- { kPFix_EMPTY, kOmitFirst9, kPFix_EMPTY },\r
- { kPFix_EMPTY, kOmitFirst7, kPFix_EMPTY },\r
- { kPFix_EMPTY, kOmitLast6, kPFix_EMPTY },\r
- { kPFix_EMPTY, kIdentity, kPFix_OPEN },\r
- { kPFix_EMPTY, kUppercaseFirst, kPFix_COMMASP },\r
- { kPFix_EMPTY, kOmitLast8, kPFix_EMPTY },\r
- { kPFix_EMPTY, kIdentity, kPFix_SPatSP },\r
- { kPFix_EMPTY, kIdentity, kPFix_lySP },\r
- { kPFix_SPtheSP, kIdentity, kPFix_SPofSP },\r
- { kPFix_EMPTY, kOmitLast5, kPFix_EMPTY },\r
- { kPFix_EMPTY, kOmitLast9, kPFix_EMPTY },\r
- { kPFix_SP, kUppercaseFirst, kPFix_COMMASP },\r
- { kPFix_EMPTY, kUppercaseFirst, kPFix_DQUOT },\r
- { kPFix_DOT, kIdentity, kPFix_OPEN },\r
- { kPFix_EMPTY, kUppercaseAll, kPFix_SP },\r
- { kPFix_EMPTY, kUppercaseFirst, kPFix_DQUOTGT },\r
- { kPFix_EMPTY, kIdentity, kPFix_EQDQUOT },\r
- { kPFix_SP, kIdentity, kPFix_DOT },\r
- { kPFix_DOTcomSLASH, kIdentity, kPFix_EMPTY },\r
- { kPFix_SPtheSP, kIdentity, kPFix_SPofSPtheSP },\r
- { kPFix_EMPTY, kUppercaseFirst, kPFix_SQUOT },\r
- { kPFix_EMPTY, kIdentity, kPFix_DOTSPThisSP },\r
- { kPFix_EMPTY, kIdentity, kPFix_COMMA },\r
- { kPFix_DOT, kIdentity, kPFix_SP },\r
- { kPFix_EMPTY, kUppercaseFirst, kPFix_OPEN },\r
- { kPFix_EMPTY, kUppercaseFirst, kPFix_DOT },\r
- { kPFix_EMPTY, kIdentity, kPFix_SPnotSP },\r
- { kPFix_SP, kIdentity, kPFix_EQDQUOT },\r
- { kPFix_EMPTY, kIdentity, kPFix_erSP },\r
- { kPFix_SP, kUppercaseAll, kPFix_SP },\r
- { kPFix_EMPTY, kIdentity, kPFix_alSP },\r
- { kPFix_SP, kUppercaseAll, kPFix_EMPTY },\r
- { kPFix_EMPTY, kIdentity, kPFix_EQSQUOT },\r
- { kPFix_EMPTY, kUppercaseAll, kPFix_DQUOT },\r
- { kPFix_EMPTY, kUppercaseFirst, kPFix_DOTSP },\r
- { kPFix_SP, kIdentity, kPFix_OPEN },\r
- { kPFix_EMPTY, kIdentity, kPFix_fulSP },\r
- { kPFix_SP, kUppercaseFirst, kPFix_DOTSP },\r
- { kPFix_EMPTY, kIdentity, kPFix_iveSP },\r
- { kPFix_EMPTY, kIdentity, kPFix_lessSP },\r
- { kPFix_EMPTY, kUppercaseAll, kPFix_SQUOT },\r
- { kPFix_EMPTY, kIdentity, kPFix_estSP },\r
- { kPFix_SP, kUppercaseFirst, kPFix_DOT },\r
- { kPFix_EMPTY, kUppercaseAll, kPFix_DQUOTGT },\r
- { kPFix_SP, kIdentity, kPFix_EQSQUOT },\r
- { kPFix_EMPTY, kUppercaseFirst, kPFix_COMMA },\r
- { kPFix_EMPTY, kIdentity, kPFix_izeSP },\r
- { kPFix_EMPTY, kUppercaseAll, kPFix_DOT },\r
- { kPFix_NBSP, kIdentity, kPFix_EMPTY },\r
- { kPFix_SP, kIdentity, kPFix_COMMA },\r
- { kPFix_EMPTY, kUppercaseFirst, kPFix_EQDQUOT },\r
- { kPFix_EMPTY, kUppercaseAll, kPFix_EQDQUOT },\r
- { kPFix_EMPTY, kIdentity, kPFix_ousSP },\r
- { kPFix_EMPTY, kUppercaseAll, kPFix_COMMASP },\r
- { kPFix_EMPTY, kUppercaseFirst, kPFix_EQSQUOT },\r
- { kPFix_SP, kUppercaseFirst, kPFix_COMMA },\r
- { kPFix_SP, kUppercaseAll, kPFix_EQDQUOT },\r
- { kPFix_SP, kUppercaseAll, kPFix_COMMASP },\r
- { kPFix_EMPTY, kUppercaseAll, kPFix_COMMA },\r
- { kPFix_EMPTY, kUppercaseAll, kPFix_OPEN },\r
- { kPFix_EMPTY, kUppercaseAll, kPFix_DOTSP },\r
- { kPFix_SP, kUppercaseAll, kPFix_DOT },\r
- { kPFix_EMPTY, kUppercaseAll, kPFix_EQSQUOT },\r
- { kPFix_SP, kUppercaseAll, kPFix_DOTSP },\r
- { kPFix_SP, kUppercaseFirst, kPFix_EQDQUOT },\r
- { kPFix_SP, kUppercaseAll, kPFix_EQSQUOT },\r
- { kPFix_SP, kUppercaseFirst, kPFix_EQSQUOT },\r
-};\r
-\r
-static const int kNumTransforms = sizeof(kTransforms) / sizeof(kTransforms[0]);\r
-\r
-static int ToUpperCase(uint8_t* p) {\r
- if (p[0] < 0xc0) {\r
- if (p[0] >= 'a' && p[0] <= 'z') {\r
- p[0] ^= 32;\r
- }\r
- return 1;\r
- }\r
- /* An overly simplified uppercasing model for utf-8. */\r
- if (p[0] < 0xe0) {\r
- p[1] ^= 32;\r
- return 2;\r
- }\r
- /* An arbitrary transform for three byte characters. */\r
- p[2] ^= 5;\r
- return 3;\r
-}\r
-\r
-static BROTLI_NOINLINE int TransformDictionaryWord(\r
- uint8_t* dst, const uint8_t* word, int len, int transform) {\r
- int idx = 0;\r
- {\r
- const char* prefix = &kPrefixSuffix[kTransforms[transform].prefix_id];\r
- while (*prefix) { dst[idx++] = (uint8_t)*prefix++; }\r
- }\r
- {\r
- const int t = kTransforms[transform].transform;\r
- int i = 0;\r
- int skip = t - (kOmitFirst1 - 1);\r
- if (skip > 0) {\r
- word += skip;\r
- len -= skip;\r
- } else if (t <= kOmitLast9) {\r
- len -= t;\r
- }\r
- while (i < len) { dst[idx++] = word[i++]; }\r
- if (t == kUppercaseFirst) {\r
- ToUpperCase(&dst[idx - len]);\r
- } else if (t == kUppercaseAll) {\r
- uint8_t* uppercase = &dst[idx - len];\r
- while (len > 0) {\r
- int step = ToUpperCase(uppercase);\r
- uppercase += step;\r
- len -= step;\r
- }\r
- }\r
- }\r
- {\r
- const char* suffix = &kPrefixSuffix[kTransforms[transform].suffix_id];\r
- while (*suffix) { dst[idx++] = (uint8_t)*suffix++; }\r
- return idx;\r
- }\r
-}\r
-\r
-#if defined(__cplusplus) || defined(c_plusplus)\r
-} /* extern "C" */\r
-#endif\r
-\r
-#endif /* BROTLI_DEC_TRANSFORM_H_ */\r