cord.h 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377
  1. /*
  2. * Copyright (c) 1993-1994 by Xerox Corporation. All rights reserved.
  3. *
  4. * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
  5. * OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
  6. *
  7. * Permission is hereby granted to use or copy this program
  8. * for any purpose, provided the above notices are retained on all copies.
  9. * Permission to modify the code and to distribute modified code is granted,
  10. * provided the above notices are retained, and a notice that the code was
  11. * modified is included with the above copyright notice.
  12. */
  13. /*
  14. * Cords are immutable character strings. A number of operations
  15. * on long cords are much more efficient than their strings.h counterpart.
  16. * In particular, concatenation takes constant time independent of the length
  17. * of the arguments. (Cords are represented as trees, with internal
  18. * nodes representing concatenation and leaves consisting of either C
  19. * strings or a functional description of the string.)
  20. *
  21. * The following are reasonable applications of cords. They would perform
  22. * unacceptably if C strings were used:
  23. * - A compiler that produces assembly language output by repeatedly
  24. * concatenating instructions onto a cord representing the output file.
  25. * - A text editor that converts the input file to a cord, and then
  26. * performs editing operations by producing a new cord representing
  27. * the file after each character change (and keeping the old ones in an
  28. * edit history)
  29. *
  30. * For optimal performance, cords should be built by
  31. * concatenating short sections.
  32. * This interface is designed for maximum compatibility with C strings.
  33. * ASCII NUL characters may be embedded in cords using CORD_from_fn.
  34. * This is handled correctly, but CORD_to_char_star will produce a string
  35. * with embedded NULs when given such a cord.
  36. *
  37. * This interface is fairly big, largely for performance reasons.
  38. * The most basic constants and functions:
  39. *
  40. * CORD - the type of a cord;
  41. * CORD_EMPTY - empty cord;
  42. * CORD_len(cord) - length of a cord;
  43. * CORD_cat(cord1,cord2) - concatenation of two cords;
  44. * CORD_substr(cord, start, len) - substring (or subcord);
  45. * CORD_pos i; CORD_FOR(i, cord) { ... CORD_pos_fetch(i) ... } -
  46. * examine each character in a cord. CORD_pos_fetch(i) is the char.
  47. * CORD_fetch(int i) - Retrieve i'th character (slowly).
  48. * CORD_cmp(cord1, cord2) - compare two cords.
  49. * CORD_from_file(FILE * f) - turn a read-only file into a cord.
  50. * CORD_to_char_star(cord) - convert to C string.
  51. * (Non-NULL C constant strings are cords.)
  52. * CORD_printf (etc.) - cord version of printf. Use %r for cords.
  53. */
  54. #ifndef CORD_H
  55. #define CORD_H
  56. #include <stddef.h>
  57. #include <stdio.h>
  58. #ifdef __cplusplus
  59. extern "C" {
  60. #endif
  61. #if defined(GC_DLL) && !defined(CORD_NOT_DLL)
  62. /* Same as for GC_API in gc_config_macros.h. */
  63. # ifdef CORD_BUILD
  64. # if defined(__MINGW32__) || defined(__CEGCC__)
  65. # define CORD_API __declspec(dllexport)
  66. # elif defined(_MSC_VER) || defined(__DMC__) || defined(__BORLANDC__) \
  67. || defined(__CYGWIN__) || defined(__WATCOMC__)
  68. # define CORD_API extern __declspec(dllexport)
  69. # elif defined(__GNUC__) && !defined(GC_NO_VISIBILITY) \
  70. && (__GNUC__ >= 4 || defined(GC_VISIBILITY_HIDDEN_SET))
  71. /* Only matters if used in conjunction with -fvisibility=hidden option. */
  72. # define CORD_API extern __attribute__((__visibility__("default")))
  73. # endif
  74. # else
  75. # if defined(__MINGW32__) || defined(__CEGCC__) || defined(_MSC_VER) \
  76. || defined(__DMC__) || defined(__BORLANDC__) || defined(__CYGWIN__)
  77. # define CORD_API __declspec(dllimport)
  78. # elif defined(__WATCOMC__)
  79. # define CORD_API extern __declspec(dllimport)
  80. # endif
  81. # endif /* !CORD_BUILD */
  82. #endif /* GC_DLL */
  83. #ifndef CORD_API
  84. # define CORD_API extern
  85. #endif
  86. /* Cords have type const char *. This is cheating quite a bit, and not */
  87. /* 100% portable. But it means that nonempty character string */
  88. /* constants may be used as cords directly, provided the string is */
  89. /* never modified in place. The empty cord is represented by, and */
  90. /* can be written as, 0. */
  91. typedef const char * CORD;
  92. /* An empty cord is always represented as nil */
  93. #define CORD_EMPTY 0
  94. /* Is a nonempty cord represented as a C string? */
  95. #define CORD_IS_STRING(s) (*(s) != '\0')
  96. /* Concatenate two cords. If the arguments are C strings, they may */
  97. /* not be subsequently altered. */
  98. CORD_API CORD CORD_cat(CORD x, CORD y);
  99. /* Concatenate a cord and a C string with known length. Except for the */
  100. /* empty string case, this is a special case of CORD_cat. Since the */
  101. /* length is known, it can be faster. */
  102. /* The string y is shared with the resulting CORD. Hence it should */
  103. /* not be altered by the caller. */
  104. CORD_API CORD CORD_cat_char_star(CORD x, const char * y, size_t leny);
  105. /* Compute the length of a cord */
  106. CORD_API size_t CORD_len(CORD x);
  107. /* Cords may be represented by functions defining the ith character */
  108. typedef char (* CORD_fn)(size_t i, void * client_data);
  109. /* Turn a functional description into a cord. */
  110. CORD_API CORD CORD_from_fn(CORD_fn fn, void * client_data, size_t len);
  111. /* Return the substring (subcord really) of x with length at most n, */
  112. /* starting at position i. (The initial character has position 0.) */
  113. CORD_API CORD CORD_substr(CORD x, size_t i, size_t n);
  114. /* Return the argument, but rebalanced to allow more efficient */
  115. /* character retrieval, substring operations, and comparisons. */
  116. /* This is useful only for cords that were built using repeated */
  117. /* concatenation. Guarantees log time access to the result, unless */
  118. /* x was obtained through a large number of repeated substring ops */
  119. /* or the embedded functional descriptions take longer to evaluate. */
  120. /* May reallocate significant parts of the cord. The argument is not */
  121. /* modified; only the result is balanced. */
  122. CORD_API CORD CORD_balance(CORD x);
  123. /* The following traverse a cord by applying a function to each */
  124. /* character. This is occasionally appropriate, especially where */
  125. /* speed is crucial. But, since C doesn't have nested functions, */
  126. /* clients of this sort of traversal are clumsy to write. Consider */
  127. /* the functions that operate on cord positions instead. */
  128. /* Function to iteratively apply to individual characters in cord. */
  129. typedef int (* CORD_iter_fn)(char c, void * client_data);
  130. /* Function to apply to substrings of a cord. Each substring is a */
  131. /* a C character string, not a general cord. */
  132. typedef int (* CORD_batched_iter_fn)(const char * s, void * client_data);
  133. #define CORD_NO_FN ((CORD_batched_iter_fn)0)
  134. /* Apply f1 to each character in the cord, in ascending order, */
  135. /* starting at position i. If */
  136. /* f2 is not CORD_NO_FN, then multiple calls to f1 may be replaced by */
  137. /* a single call to f2. The parameter f2 is provided only to allow */
  138. /* some optimization by the client. This terminates when the right */
  139. /* end of this string is reached, or when f1 or f2 return != 0. In the */
  140. /* latter case CORD_iter returns != 0. Otherwise it returns 0. */
  141. /* The specified value of i must be < CORD_len(x). */
  142. CORD_API int CORD_iter5(CORD x, size_t i, CORD_iter_fn f1,
  143. CORD_batched_iter_fn f2, void * client_data);
  144. /* A simpler version that starts at 0, and without f2: */
  145. CORD_API int CORD_iter(CORD x, CORD_iter_fn f1, void * client_data);
  146. #define CORD_iter(x, f1, cd) CORD_iter5(x, 0, f1, CORD_NO_FN, cd)
  147. /* Similar to CORD_iter5, but end-to-beginning. No provisions for */
  148. /* CORD_batched_iter_fn. */
  149. CORD_API int CORD_riter4(CORD x, size_t i, CORD_iter_fn f1, void * client_data);
  150. /* A simpler version that starts at the end: */
  151. CORD_API int CORD_riter(CORD x, CORD_iter_fn f1, void * client_data);
  152. #ifdef __cplusplus
  153. } /* extern "C" */
  154. #endif
  155. /* Functions that operate on cord positions. The easy way to traverse */
  156. /* cords. A cord position is logically a pair consisting of a cord */
  157. /* and an index into that cord. But it is much faster to retrieve a */
  158. /* character based on a position than on an index. Unfortunately, */
  159. /* positions are big (order of a few 100 bytes), so allocate them with */
  160. /* caution. */
  161. /* Things in cord_pos.h should be treated as opaque, except as */
  162. /* described below. Also note that */
  163. /* CORD_pos_fetch, CORD_next and CORD_prev have both macro and function */
  164. /* definitions. The former may evaluate their argument more than once. */
  165. #include "cord_pos.h"
  166. #ifdef __cplusplus
  167. extern "C" {
  168. #endif
  169. /*
  170. Visible definitions from above:
  171. typedef <OPAQUE but fairly big> CORD_pos[1];
  172. * Extract the cord from a position:
  173. CORD CORD_pos_to_cord(CORD_pos p);
  174. * Extract the current index from a position:
  175. size_t CORD_pos_to_index(CORD_pos p);
  176. * Fetch the character located at the given position:
  177. char CORD_pos_fetch(CORD_pos p);
  178. * Initialize the position to refer to the given cord and index.
  179. * Note that this is the most expensive function on positions:
  180. void CORD_set_pos(CORD_pos p, CORD x, size_t i);
  181. * Advance the position to the next character.
  182. * P must be initialized and valid.
  183. * Invalidates p if past end:
  184. void CORD_next(CORD_pos p);
  185. * Move the position to the preceding character.
  186. * P must be initialized and valid.
  187. * Invalidates p if past beginning:
  188. void CORD_prev(CORD_pos p);
  189. * Is the position valid, i.e. inside the cord?
  190. int CORD_pos_valid(CORD_pos p);
  191. */
  192. #define CORD_FOR(pos, cord) \
  193. for (CORD_set_pos(pos, cord, 0); CORD_pos_valid(pos); CORD_next(pos))
  194. /* An out of memory handler to call. May be supplied by client. */
  195. /* Must not return. */
  196. extern void (* CORD_oom_fn)(void);
  197. /* Dump the representation of x to stdout in an implementation defined */
  198. /* manner. Intended for debugging only. */
  199. CORD_API void CORD_dump(CORD x);
  200. /* The following could easily be implemented by the client. They are */
  201. /* provided in cordxtra.c for convenience. */
  202. /* Concatenate a character to the end of a cord. */
  203. CORD_API CORD CORD_cat_char(CORD x, char c);
  204. /* Concatenate n cords. */
  205. CORD_API CORD CORD_catn(int n, /* CORD */ ...);
  206. /* Return the character in CORD_substr(x, i, 1) */
  207. CORD_API char CORD_fetch(CORD x, size_t i);
  208. /* Return < 0, 0, or > 0, depending on whether x < y, x = y, x > y */
  209. CORD_API int CORD_cmp(CORD x, CORD y);
  210. /* A generalization that takes both starting positions for the */
  211. /* comparison, and a limit on the number of characters to be compared. */
  212. CORD_API int CORD_ncmp(CORD x, size_t x_start, CORD y, size_t y_start,
  213. size_t len);
  214. /* Find the first occurrence of s in x at position start or later. */
  215. /* Return the position of the first character of s in x, or */
  216. /* CORD_NOT_FOUND if there is none. */
  217. CORD_API size_t CORD_str(CORD x, size_t start, CORD s);
  218. /* Return a cord consisting of i copies of (possibly NUL) c. Dangerous */
  219. /* in conjunction with CORD_to_char_star. */
  220. /* The resulting representation takes constant space, independent of i. */
  221. CORD_API CORD CORD_chars(char c, size_t i);
  222. #define CORD_nul(i) CORD_chars('\0', (i))
  223. /* Turn a file into cord. The file must be seekable. Its contents */
  224. /* must remain constant. The file may be accessed as an immediate */
  225. /* result of this call and/or as a result of subsequent accesses to */
  226. /* the cord. Short files are likely to be immediately read, but */
  227. /* long files are likely to be read on demand, possibly relying on */
  228. /* stdio for buffering. */
  229. /* We must have exclusive access to the descriptor f, i.e. we may */
  230. /* read it at any time, and expect the file pointer to be */
  231. /* where we left it. Normally this should be invoked as */
  232. /* CORD_from_file(fopen(...)) */
  233. /* CORD_from_file arranges to close the file descriptor when it is no */
  234. /* longer needed (e.g. when the result becomes inaccessible). */
  235. /* The file f must be such that ftell reflects the actual character */
  236. /* position in the file, i.e. the number of characters that can be */
  237. /* or were read with fread. On UNIX systems this is always true. On */
  238. /* MS Windows systems, f must be opened in binary mode. */
  239. CORD_API CORD CORD_from_file(FILE * f);
  240. /* Equivalent to the above, except that the entire file will be read */
  241. /* and the file pointer will be closed immediately. */
  242. /* The binary mode restriction from above does not apply. */
  243. CORD_API CORD CORD_from_file_eager(FILE * f);
  244. /* Equivalent to the above, except that the file will be read on demand.*/
  245. /* The binary mode restriction applies. */
  246. CORD_API CORD CORD_from_file_lazy(FILE * f);
  247. /* Turn a cord into a C string. The result shares no structure with */
  248. /* x, and is thus modifiable. */
  249. CORD_API char * CORD_to_char_star(CORD x);
  250. /* Turn a C string into a CORD. The C string is copied, and so may */
  251. /* subsequently be modified. */
  252. CORD_API CORD CORD_from_char_star(const char *s);
  253. /* Identical to the above, but the result may share structure with */
  254. /* the argument and is thus not modifiable. */
  255. CORD_API const char * CORD_to_const_char_star(CORD x);
  256. /* Write a cord to a file, starting at the current position. No */
  257. /* trailing NULs are newlines are added. */
  258. /* Returns EOF if a write error occurs, 1 otherwise. */
  259. CORD_API int CORD_put(CORD x, FILE * f);
  260. /* "Not found" result for the following two functions. */
  261. #define CORD_NOT_FOUND ((size_t)(-1))
  262. /* A vague analog of strchr. Returns the position (an integer, not */
  263. /* a pointer) of the first occurrence of (char) c inside x at position */
  264. /* i or later. The value i must be < CORD_len(x). */
  265. CORD_API size_t CORD_chr(CORD x, size_t i, int c);
  266. /* A vague analog of strrchr. Returns index of the last occurrence */
  267. /* of (char) c inside x at position i or earlier. The value i */
  268. /* must be < CORD_len(x). */
  269. CORD_API size_t CORD_rchr(CORD x, size_t i, int c);
  270. #ifdef __cplusplus
  271. } /* extern "C" */
  272. #endif
  273. /* The following are also not primitive, but are implemented in */
  274. /* cordprnt.c. They provide functionality similar to the ANSI C */
  275. /* functions with corresponding names, but with the following */
  276. /* additions and changes: */
  277. /* 1. A %r conversion specification specifies a CORD argument. Field */
  278. /* width, precision, etc. have the same semantics as for %s. */
  279. /* (Note that %c, %C, and %S were already taken.) */
  280. /* 2. The format string is represented as a CORD. */
  281. /* 3. CORD_sprintf and CORD_vsprintf assign the result through the 1st */
  282. /* argument. Unlike their ANSI C versions, there is no need to guess */
  283. /* the correct buffer size. */
  284. /* 4. Most of the conversions are implement through the native */
  285. /* vsprintf. Hence they are usually no faster, and */
  286. /* idiosyncrasies of the native printf are preserved. However, */
  287. /* CORD arguments to CORD_sprintf and CORD_vsprintf are NOT copied; */
  288. /* the result shares the original structure. This may make them */
  289. /* very efficient in some unusual applications. */
  290. /* The format string is copied. */
  291. /* All functions return the number of characters generated or -1 on */
  292. /* error. This complies with the ANSI standard, but is inconsistent */
  293. /* with some older implementations of sprintf. */
  294. /* The implementation of these is probably less portable than the rest */
  295. /* of this package. */
  296. #ifndef CORD_NO_IO
  297. #include <stdarg.h>
  298. # ifdef __cplusplus
  299. extern "C" {
  300. # endif
  301. CORD_API int CORD_sprintf(CORD * out, CORD format, ...);
  302. CORD_API int CORD_vsprintf(CORD * out, CORD format, va_list args);
  303. CORD_API int CORD_fprintf(FILE * f, CORD format, ...);
  304. CORD_API int CORD_vfprintf(FILE * f, CORD format, va_list args);
  305. CORD_API int CORD_printf(CORD format, ...);
  306. CORD_API int CORD_vprintf(CORD format, va_list args);
  307. # ifdef __cplusplus
  308. } /* extern "C" */
  309. # endif
  310. #endif /* CORD_NO_IO */
  311. #endif /* CORD_H */