transform.c 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291
  1. /* Copyright 2013 Google Inc. All Rights Reserved.
  2. Distributed under MIT license.
  3. See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
  4. */
  5. #include "./transform.h"
  6. #if defined(__cplusplus) || defined(c_plusplus)
  7. extern "C" {
  8. #endif
  9. /* RFC 7932 transforms string data */
  10. static const char kPrefixSuffix[217] =
  11. "\1 \2, \10 of the \4 of \2s \1.\5 and \4 "
  12. /* 0x _0 _2 __5 _E _3 _6 _8 _E */
  13. "in \1\"\4 to \2\">\1\n\2. \1]\5 for \3 a \6 "
  14. /* 2x _3_ _5 _A_ _D_ _F _2 _4 _A _E */
  15. "that \1\'\6 with \6 from \4 by \1(\6. T"
  16. /* 4x _5_ _7 _E _5 _A _C */
  17. "he \4 on \4 as \4 is \4ing \2\n\t\1:\3ed "
  18. /* 6x _3 _8 _D _2 _7_ _ _A _C */
  19. "\2=\"\4 at \3ly \1,\2=\'\5.com/\7. This \5"
  20. /* 8x _0 _ _3 _8 _C _E _ _1 _7 _F */
  21. " not \3er \3al \4ful \4ive \5less \4es"
  22. /* Ax _5 _9 _D _2 _7 _D */
  23. "t \4ize \2\xc2\xa0\4ous \5 the \2e "; /* \0 - implicit trailing zero. */
  24. /* Cx _2 _7___ ___ _A _F _5 _8 */
  25. static const uint16_t kPrefixSuffixMap[50] = {
  26. 0x00, 0x02, 0x05, 0x0E, 0x13, 0x16, 0x18, 0x1E, 0x23, 0x25,
  27. 0x2A, 0x2D, 0x2F, 0x32, 0x34, 0x3A, 0x3E, 0x45, 0x47, 0x4E,
  28. 0x55, 0x5A, 0x5C, 0x63, 0x68, 0x6D, 0x72, 0x77, 0x7A, 0x7C,
  29. 0x80, 0x83, 0x88, 0x8C, 0x8E, 0x91, 0x97, 0x9F, 0xA5, 0xA9,
  30. 0xAD, 0xB2, 0xB7, 0xBD, 0xC2, 0xC7, 0xCA, 0xCF, 0xD5, 0xD8
  31. };
  32. /* RFC 7932 transforms */
  33. static const uint8_t kTransformsData[] = {
  34. 49, BROTLI_TRANSFORM_IDENTITY, 49,
  35. 49, BROTLI_TRANSFORM_IDENTITY, 0,
  36. 0, BROTLI_TRANSFORM_IDENTITY, 0,
  37. 49, BROTLI_TRANSFORM_OMIT_FIRST_1, 49,
  38. 49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 0,
  39. 49, BROTLI_TRANSFORM_IDENTITY, 47,
  40. 0, BROTLI_TRANSFORM_IDENTITY, 49,
  41. 4, BROTLI_TRANSFORM_IDENTITY, 0,
  42. 49, BROTLI_TRANSFORM_IDENTITY, 3,
  43. 49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 49,
  44. 49, BROTLI_TRANSFORM_IDENTITY, 6,
  45. 49, BROTLI_TRANSFORM_OMIT_FIRST_2, 49,
  46. 49, BROTLI_TRANSFORM_OMIT_LAST_1, 49,
  47. 1, BROTLI_TRANSFORM_IDENTITY, 0,
  48. 49, BROTLI_TRANSFORM_IDENTITY, 1,
  49. 0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 0,
  50. 49, BROTLI_TRANSFORM_IDENTITY, 7,
  51. 49, BROTLI_TRANSFORM_IDENTITY, 9,
  52. 48, BROTLI_TRANSFORM_IDENTITY, 0,
  53. 49, BROTLI_TRANSFORM_IDENTITY, 8,
  54. 49, BROTLI_TRANSFORM_IDENTITY, 5,
  55. 49, BROTLI_TRANSFORM_IDENTITY, 10,
  56. 49, BROTLI_TRANSFORM_IDENTITY, 11,
  57. 49, BROTLI_TRANSFORM_OMIT_LAST_3, 49,
  58. 49, BROTLI_TRANSFORM_IDENTITY, 13,
  59. 49, BROTLI_TRANSFORM_IDENTITY, 14,
  60. 49, BROTLI_TRANSFORM_OMIT_FIRST_3, 49,
  61. 49, BROTLI_TRANSFORM_OMIT_LAST_2, 49,
  62. 49, BROTLI_TRANSFORM_IDENTITY, 15,
  63. 49, BROTLI_TRANSFORM_IDENTITY, 16,
  64. 0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 49,
  65. 49, BROTLI_TRANSFORM_IDENTITY, 12,
  66. 5, BROTLI_TRANSFORM_IDENTITY, 49,
  67. 0, BROTLI_TRANSFORM_IDENTITY, 1,
  68. 49, BROTLI_TRANSFORM_OMIT_FIRST_4, 49,
  69. 49, BROTLI_TRANSFORM_IDENTITY, 18,
  70. 49, BROTLI_TRANSFORM_IDENTITY, 17,
  71. 49, BROTLI_TRANSFORM_IDENTITY, 19,
  72. 49, BROTLI_TRANSFORM_IDENTITY, 20,
  73. 49, BROTLI_TRANSFORM_OMIT_FIRST_5, 49,
  74. 49, BROTLI_TRANSFORM_OMIT_FIRST_6, 49,
  75. 47, BROTLI_TRANSFORM_IDENTITY, 49,
  76. 49, BROTLI_TRANSFORM_OMIT_LAST_4, 49,
  77. 49, BROTLI_TRANSFORM_IDENTITY, 22,
  78. 49, BROTLI_TRANSFORM_UPPERCASE_ALL, 49,
  79. 49, BROTLI_TRANSFORM_IDENTITY, 23,
  80. 49, BROTLI_TRANSFORM_IDENTITY, 24,
  81. 49, BROTLI_TRANSFORM_IDENTITY, 25,
  82. 49, BROTLI_TRANSFORM_OMIT_LAST_7, 49,
  83. 49, BROTLI_TRANSFORM_OMIT_LAST_1, 26,
  84. 49, BROTLI_TRANSFORM_IDENTITY, 27,
  85. 49, BROTLI_TRANSFORM_IDENTITY, 28,
  86. 0, BROTLI_TRANSFORM_IDENTITY, 12,
  87. 49, BROTLI_TRANSFORM_IDENTITY, 29,
  88. 49, BROTLI_TRANSFORM_OMIT_FIRST_9, 49,
  89. 49, BROTLI_TRANSFORM_OMIT_FIRST_7, 49,
  90. 49, BROTLI_TRANSFORM_OMIT_LAST_6, 49,
  91. 49, BROTLI_TRANSFORM_IDENTITY, 21,
  92. 49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 1,
  93. 49, BROTLI_TRANSFORM_OMIT_LAST_8, 49,
  94. 49, BROTLI_TRANSFORM_IDENTITY, 31,
  95. 49, BROTLI_TRANSFORM_IDENTITY, 32,
  96. 47, BROTLI_TRANSFORM_IDENTITY, 3,
  97. 49, BROTLI_TRANSFORM_OMIT_LAST_5, 49,
  98. 49, BROTLI_TRANSFORM_OMIT_LAST_9, 49,
  99. 0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 1,
  100. 49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 8,
  101. 5, BROTLI_TRANSFORM_IDENTITY, 21,
  102. 49, BROTLI_TRANSFORM_UPPERCASE_ALL, 0,
  103. 49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 10,
  104. 49, BROTLI_TRANSFORM_IDENTITY, 30,
  105. 0, BROTLI_TRANSFORM_IDENTITY, 5,
  106. 35, BROTLI_TRANSFORM_IDENTITY, 49,
  107. 47, BROTLI_TRANSFORM_IDENTITY, 2,
  108. 49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 17,
  109. 49, BROTLI_TRANSFORM_IDENTITY, 36,
  110. 49, BROTLI_TRANSFORM_IDENTITY, 33,
  111. 5, BROTLI_TRANSFORM_IDENTITY, 0,
  112. 49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 21,
  113. 49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 5,
  114. 49, BROTLI_TRANSFORM_IDENTITY, 37,
  115. 0, BROTLI_TRANSFORM_IDENTITY, 30,
  116. 49, BROTLI_TRANSFORM_IDENTITY, 38,
  117. 0, BROTLI_TRANSFORM_UPPERCASE_ALL, 0,
  118. 49, BROTLI_TRANSFORM_IDENTITY, 39,
  119. 0, BROTLI_TRANSFORM_UPPERCASE_ALL, 49,
  120. 49, BROTLI_TRANSFORM_IDENTITY, 34,
  121. 49, BROTLI_TRANSFORM_UPPERCASE_ALL, 8,
  122. 49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 12,
  123. 0, BROTLI_TRANSFORM_IDENTITY, 21,
  124. 49, BROTLI_TRANSFORM_IDENTITY, 40,
  125. 0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 12,
  126. 49, BROTLI_TRANSFORM_IDENTITY, 41,
  127. 49, BROTLI_TRANSFORM_IDENTITY, 42,
  128. 49, BROTLI_TRANSFORM_UPPERCASE_ALL, 17,
  129. 49, BROTLI_TRANSFORM_IDENTITY, 43,
  130. 0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 5,
  131. 49, BROTLI_TRANSFORM_UPPERCASE_ALL, 10,
  132. 0, BROTLI_TRANSFORM_IDENTITY, 34,
  133. 49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 33,
  134. 49, BROTLI_TRANSFORM_IDENTITY, 44,
  135. 49, BROTLI_TRANSFORM_UPPERCASE_ALL, 5,
  136. 45, BROTLI_TRANSFORM_IDENTITY, 49,
  137. 0, BROTLI_TRANSFORM_IDENTITY, 33,
  138. 49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 30,
  139. 49, BROTLI_TRANSFORM_UPPERCASE_ALL, 30,
  140. 49, BROTLI_TRANSFORM_IDENTITY, 46,
  141. 49, BROTLI_TRANSFORM_UPPERCASE_ALL, 1,
  142. 49, BROTLI_TRANSFORM_UPPERCASE_FIRST, 34,
  143. 0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 33,
  144. 0, BROTLI_TRANSFORM_UPPERCASE_ALL, 30,
  145. 0, BROTLI_TRANSFORM_UPPERCASE_ALL, 1,
  146. 49, BROTLI_TRANSFORM_UPPERCASE_ALL, 33,
  147. 49, BROTLI_TRANSFORM_UPPERCASE_ALL, 21,
  148. 49, BROTLI_TRANSFORM_UPPERCASE_ALL, 12,
  149. 0, BROTLI_TRANSFORM_UPPERCASE_ALL, 5,
  150. 49, BROTLI_TRANSFORM_UPPERCASE_ALL, 34,
  151. 0, BROTLI_TRANSFORM_UPPERCASE_ALL, 12,
  152. 0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 30,
  153. 0, BROTLI_TRANSFORM_UPPERCASE_ALL, 34,
  154. 0, BROTLI_TRANSFORM_UPPERCASE_FIRST, 34,
  155. };
  156. static const BrotliTransforms kBrotliTransforms = {
  157. sizeof(kPrefixSuffix),
  158. (const uint8_t*)kPrefixSuffix,
  159. kPrefixSuffixMap,
  160. sizeof(kTransformsData) / (3 * sizeof(kTransformsData[0])),
  161. kTransformsData,
  162. NULL, /* no extra parameters */
  163. {0, 12, 27, 23, 42, 63, 56, 48, 59, 64}
  164. };
  165. const BrotliTransforms* BrotliGetTransforms(void) {
  166. return &kBrotliTransforms;
  167. }
  168. static int ToUpperCase(uint8_t* p) {
  169. if (p[0] < 0xC0) {
  170. if (p[0] >= 'a' && p[0] <= 'z') {
  171. p[0] ^= 32;
  172. }
  173. return 1;
  174. }
  175. /* An overly simplified uppercasing model for UTF-8. */
  176. if (p[0] < 0xE0) {
  177. p[1] ^= 32;
  178. return 2;
  179. }
  180. /* An arbitrary transform for three byte characters. */
  181. p[2] ^= 5;
  182. return 3;
  183. }
  184. static int Shift(uint8_t* word, int word_len, uint16_t parameter) {
  185. /* Limited sign extension: scalar < (1 << 24). */
  186. uint32_t scalar =
  187. (parameter & 0x7FFFu) + (0x1000000u - (parameter & 0x8000u));
  188. if (word[0] < 0x80) {
  189. /* 1-byte rune / 0sssssss / 7 bit scalar (ASCII). */
  190. scalar += (uint32_t)word[0];
  191. word[0] = (uint8_t)(scalar & 0x7Fu);
  192. return 1;
  193. } else if (word[0] < 0xC0) {
  194. /* Continuation / 10AAAAAA. */
  195. return 1;
  196. } else if (word[0] < 0xE0) {
  197. /* 2-byte rune / 110sssss AAssssss / 11 bit scalar. */
  198. if (word_len < 2) return 1;
  199. scalar += (uint32_t)((word[1] & 0x3Fu) | ((word[0] & 0x1Fu) << 6u));
  200. word[0] = (uint8_t)(0xC0 | ((scalar >> 6u) & 0x1F));
  201. word[1] = (uint8_t)((word[1] & 0xC0) | (scalar & 0x3F));
  202. return 2;
  203. } else if (word[0] < 0xF0) {
  204. /* 3-byte rune / 1110ssss AAssssss BBssssss / 16 bit scalar. */
  205. if (word_len < 3) return word_len;
  206. scalar += (uint32_t)((word[2] & 0x3Fu) | ((word[1] & 0x3Fu) << 6u) |
  207. ((word[0] & 0x0Fu) << 12u));
  208. word[0] = (uint8_t)(0xE0 | ((scalar >> 12u) & 0x0F));
  209. word[1] = (uint8_t)((word[1] & 0xC0) | ((scalar >> 6u) & 0x3F));
  210. word[2] = (uint8_t)((word[2] & 0xC0) | (scalar & 0x3F));
  211. return 3;
  212. } else if (word[0] < 0xF8) {
  213. /* 4-byte rune / 11110sss AAssssss BBssssss CCssssss / 21 bit scalar. */
  214. if (word_len < 4) return word_len;
  215. scalar += (uint32_t)((word[3] & 0x3Fu) | ((word[2] & 0x3Fu) << 6u) |
  216. ((word[1] & 0x3Fu) << 12u) | ((word[0] & 0x07u) << 18u));
  217. word[0] = (uint8_t)(0xF0 | ((scalar >> 18u) & 0x07));
  218. word[1] = (uint8_t)((word[1] & 0xC0) | ((scalar >> 12u) & 0x3F));
  219. word[2] = (uint8_t)((word[2] & 0xC0) | ((scalar >> 6u) & 0x3F));
  220. word[3] = (uint8_t)((word[3] & 0xC0) | (scalar & 0x3F));
  221. return 4;
  222. }
  223. return 1;
  224. }
  225. int BrotliTransformDictionaryWord(uint8_t* dst, const uint8_t* word, int len,
  226. const BrotliTransforms* transforms, int transform_idx) {
  227. int idx = 0;
  228. const uint8_t* prefix = BROTLI_TRANSFORM_PREFIX(transforms, transform_idx);
  229. uint8_t type = BROTLI_TRANSFORM_TYPE(transforms, transform_idx);
  230. const uint8_t* suffix = BROTLI_TRANSFORM_SUFFIX(transforms, transform_idx);
  231. {
  232. int prefix_len = *prefix++;
  233. while (prefix_len--) { dst[idx++] = *prefix++; }
  234. }
  235. {
  236. const int t = type;
  237. int i = 0;
  238. if (t <= BROTLI_TRANSFORM_OMIT_LAST_9) {
  239. len -= t;
  240. } else if (t >= BROTLI_TRANSFORM_OMIT_FIRST_1
  241. && t <= BROTLI_TRANSFORM_OMIT_FIRST_9) {
  242. int skip = t - (BROTLI_TRANSFORM_OMIT_FIRST_1 - 1);
  243. word += skip;
  244. len -= skip;
  245. }
  246. while (i < len) { dst[idx++] = word[i++]; }
  247. if (t == BROTLI_TRANSFORM_UPPERCASE_FIRST) {
  248. ToUpperCase(&dst[idx - len]);
  249. } else if (t == BROTLI_TRANSFORM_UPPERCASE_ALL) {
  250. uint8_t* uppercase = &dst[idx - len];
  251. while (len > 0) {
  252. int step = ToUpperCase(uppercase);
  253. uppercase += step;
  254. len -= step;
  255. }
  256. } else if (t == BROTLI_TRANSFORM_SHIFT_FIRST) {
  257. uint16_t param = (uint16_t)(transforms->params[transform_idx * 2]
  258. + (transforms->params[transform_idx * 2 + 1] << 8u));
  259. Shift(&dst[idx - len], len, param);
  260. } else if (t == BROTLI_TRANSFORM_SHIFT_ALL) {
  261. uint16_t param = (uint16_t)(transforms->params[transform_idx * 2]
  262. + (transforms->params[transform_idx * 2 + 1] << 8u));
  263. uint8_t* shift = &dst[idx - len];
  264. while (len > 0) {
  265. int step = Shift(shift, len, param);
  266. shift += step;
  267. len -= step;
  268. }
  269. }
  270. }
  271. {
  272. int suffix_len = *suffix++;
  273. while (suffix_len--) { dst[idx++] = *suffix++; }
  274. return idx;
  275. }
  276. }
  277. #if defined(__cplusplus) || defined(c_plusplus)
  278. } /* extern "C" */
  279. #endif