source: text.c @ 4ebbfbc

release-1.10release-1.9
Last change on this file since 4ebbfbc was 14be3a5, checked in by Jason Gross <jgross@mit.edu>, 13 years ago
Give owl_text_indent an indent_first_line parameter This is useful if you need to indent text that isn't broken into chunks that end with newlines. This is primarily in preparation for the next commit, where we need to replace the indent on the first line by a prefix. The other (reasonable) option is to make owl_text_indent always prefix the string it's given with an indent, even when it's given the empty string. This would break the nice property that indent(A + B) = indent(A) + indent(B) whenever A ended with a newline. After some discussion on zephyr and on the github pull request, I decided to go with this option.
  • Property mode set to 100644
File size: 6.8 KB
Line 
1#include <stdio.h>
2#include <string.h>
3#include <stdlib.h>
4#include <ctype.h>
5#include "owl.h"
6
7/* Returns a copy of 'in' with each line indented 'n'
8 * characters. Result must be freed with g_free. */
9CALLER_OWN char *owl_text_indent(const char *in, int n, bool indent_first_line)
10{
11  const char *ptr1, *ptr2, *last;
12  GString *out = g_string_new("");
13  int i;
14  bool indent_this_line = indent_first_line;
15
16  last=in+strlen(in)-1;
17  ptr1=in;
18  while (ptr1<=last) {
19    if (indent_this_line) {
20      for (i = 0; i < n; i++) {
21        g_string_append_c(out, ' ');
22      }
23    }
24    indent_this_line = true;
25    ptr2=strchr(ptr1, '\n');
26    if (!ptr2) {
27      g_string_append(out, ptr1);
28      break;
29    } else {
30      g_string_append_len(out, ptr1, ptr2-ptr1+1);
31    }
32    ptr1=ptr2+1;
33  }
34  return g_string_free(out, false);
35}
36
37int owl_text_num_lines(const char *in)
38{
39  int lines, i;
40
41  lines=0;
42  for (i=0; in[i]!='\0'; i++) {
43    if (in[i]=='\n') lines++;
44  }
45
46  /* if the last char wasn't a \n there's one more line */
47  if (i>0 && in[i-1]!='\n') lines++;
48
49  return(lines);
50}
51
52
53/* caller must free the return */
54CALLER_OWN char *owl_text_htmlstrip(const char *in)
55{
56  const char *ptr1, *end, *ptr2, *ptr3;
57  char *out, *out2;
58
59  out=g_new(char, strlen(in)+30);
60  strcpy(out, "");
61
62  ptr1=in;
63  end=in+strlen(in);
64 
65  while(ptr1<end) {
66    /* look for an open bracket */
67    ptr2=strchr(ptr1, '<');
68
69    /* if none, copy in from here to end and exit */
70    if (ptr2==NULL) {
71      strcat(out, ptr1);
72      break;
73    }
74
75    /* otherwise copy in everything before the open bracket */
76    if (ptr2>ptr1) {
77      strncat(out, ptr1, ptr2-ptr1);
78    }
79
80    /* find the close bracket */
81    ptr3=strchr(ptr2, '>');
82   
83    /* if there is no close, copy as you are and exit */
84    if (!ptr3) {
85      strcat(out, ptr2);
86      break;
87    }
88
89    /* look for things we know */
90    if (!strncasecmp(ptr2, "<BODY", 5) ||
91        !strncasecmp(ptr2, "<FONT", 5) ||
92        !strncasecmp(ptr2, "<HTML", 5) ||
93        !strncasecmp(ptr2, "</FONT", 6) ||
94        !strncasecmp(ptr2, "</HTML", 6) ||
95        !strncasecmp(ptr2, "</BODY", 6)) {
96
97      /* advance to beyond the angle brakcet and go again */
98      ptr1=ptr3+1;
99      continue;
100    }
101    if (!strncasecmp(ptr2, "<BR>", 4)) {
102      strcat(out, "\n");
103      ptr1=ptr3+1;
104      continue;
105    }
106
107    /* if it wasn't something we know, copy to the > and  go again */
108    strncat(out, ptr2, ptr3-ptr2+1);
109    ptr1=ptr3+1;
110  }
111
112  out2=owl_text_substitute(out, "&lt;", "<");
113  g_free(out);
114  out=owl_text_substitute(out2, "&gt;", ">");
115  g_free(out2);
116  out2=owl_text_substitute(out, "&amp;", "&");
117  g_free(out);
118  out=owl_text_substitute(out2, "&quot;", "\"");
119  g_free(out2);
120  out2=owl_text_substitute(out, "&nbsp;", " ");
121  g_free(out);
122  out=owl_text_substitute(out2, "&ensp;", "  ");
123  g_free(out2);
124  out2=owl_text_substitute(out, "&emsp;", "   ");
125  g_free(out);
126  out=owl_text_substitute(out2, "&endash;", "--");
127  g_free(out2);
128  out2=owl_text_substitute(out, "&emdash;", "---");
129  g_free(out);
130
131  return(out2);
132}
133
134/* Caller must free return */
135CALLER_OWN char *owl_text_expand_tabs(const char *in)
136{
137  int len = 0;
138  const char *p = in;
139  char *ret, *out;
140  int col;
141
142  col = 0;
143  while(*p) {
144    gunichar c = g_utf8_get_char(p);
145    const char *q = g_utf8_next_char(p);
146    switch (c) {
147    case '\t':
148      do { len++; col++; } while (col % OWL_TAB_WIDTH);
149      p = q;
150      continue;
151    case '\n':
152      col = 0;
153      break;
154    default:
155      col += mk_wcwidth(c);
156      break;
157    }
158    len += q - p;
159    p = q;
160  }
161
162  ret = g_new(char, len + 1);
163
164  p = in;
165  out = ret;
166
167  col = 0;
168  while(*p) {
169    gunichar c = g_utf8_get_char(p);
170    const char *q = g_utf8_next_char(p);
171    switch (c) {
172    case '\t':
173      do {*(out++) = ' '; col++; } while (col % OWL_TAB_WIDTH);
174      p = q;
175      continue;
176    case '\n':
177      col = 0;
178      break;
179    default:
180      col += mk_wcwidth(c);
181      break;
182    }
183    memcpy(out, p, q - p);
184    out += q - p;
185    p = q;
186  }
187
188  *out = 0;
189
190  return ret;
191}
192
193/* caller must free the return */
194CALLER_OWN char *owl_text_wordwrap(const char *in, int col)
195{
196  char *out;
197  int cur, lastspace, len, lastnewline;
198
199  out=g_strdup(in);
200  len=strlen(in);
201  cur=0;
202  lastspace=-1;
203  lastnewline=-1;
204
205  while (cur<(len-1)) {
206    if (out[cur]==' ') {
207      lastspace=cur;
208      cur++;
209      continue;
210    } else if (out[cur]=='\n') {
211      lastnewline=cur;
212      cur++;
213      continue;
214    }
215
216    /* do we need to wrap? */
217    if ( (cur-(lastnewline+1)) > col ) {
218      if (lastspace==-1 ||
219          (lastnewline>0 && (lastspace<=lastnewline))) {
220        /* we can't help, sorry */
221        cur++;
222        continue;
223      }
224
225      /* turn the last space into a newline */
226      out[lastspace]='\n';
227      lastnewline=lastspace;
228      lastspace=-1;
229      cur++;
230      continue;
231    }
232
233    cur++;
234    continue;
235  }
236  return(out);
237}
238
239/* this modifies 'in' */
240void owl_text_wordunwrap(char *in)
241{
242  int i, j;
243
244  j=strlen(in);
245  for (i=0; i<j; i++) {
246    if ( (in[i]=='\n') &&
247         ((i>0) && (i<(j-1))) &&
248         (in[i-1]!='\n') &&
249         (in[i+1]!='\n') )
250      in[i]=' ';
251  }
252}
253
254/* return 1 if a string is only whitespace, otherwise 0 */
255int only_whitespace(const char *s)
256{
257  if (g_utf8_validate(s,-1,NULL)) {
258    const char *p;
259    for(p = s; p[0]; p=g_utf8_next_char(p)) {
260      if (!g_unichar_isspace(g_utf8_get_char(p))) return 0;
261    }
262  }
263  else {
264    int i;
265    for (i=0; s[i]; i++) {
266      if (!isspace((int) s[i])) return(0);
267    }
268  }
269  return(1);
270}
271
272/* Return a string with any occurances of 'from' replaced with 'to'.
273 * Caller must free returned string.
274 */
275CALLER_OWN char *owl_text_substitute(const char *in, const char *from, const char *to)
276{
277  char **split = g_strsplit(in, from, 0), *out;
278  out = g_strjoinv(to, split);
279  g_strfreev(split);
280  return out;
281}
282
283/* Return a string which is like 'in' except that every instance of
284 * any character in 'toquote' found in 'in' is preceeded by the string
285 * 'quotestr'.  For example, owl_text_quote(in, "+*.", "\") would
286 * place a backslash before every '+', '*' or '.' in 'in'.  It is
287 * permissable for a character in 'quotestr' to be in 'toquote'.
288 * On success returns the string, on error returns NULL.
289 */
290CALLER_OWN char *owl_text_quote(const char *in, const char *toquote, const char *quotestr)
291{
292  int i, x, r, place, escape;
293  int in_len, toquote_len, quotestr_len;
294  char *out;
295
296  in_len=strlen(in);
297  toquote_len=strlen(toquote);
298  quotestr_len=strlen(quotestr);
299  place=0;
300  escape = 0;
301  for (i=0; i<in_len; i++) {
302    if(strchr(toquote, in[i]) != NULL)
303      escape++;
304  }
305  out = g_new(char, in_len + quotestr_len*escape+1);
306  for (i=0; i<in_len; i++) {
307
308    /* check if it's a character that needs quoting */
309    for (x=0; x<toquote_len; x++) {
310      if (in[i]==toquote[x]) {
311        /* quote it */
312        for (r=0; r<quotestr_len; r++) {
313          out[place+r]=quotestr[r];
314        }
315        place+=quotestr_len;
316        break;
317      }
318    }
319
320    /* either way, we now copy over the character */
321    out[place]=in[i];
322    place++;
323  }
324  out[place]='\0';
325  return(out);
326}
Note: See TracBrowser for help on using the repository browser.