source: text.c @ d7bcff8

release-1.10release-1.4release-1.5release-1.6release-1.7release-1.8release-1.9
Last change on this file since d7bcff8 was 64c9165, checked in by Anders Kaseorg <andersk@mit.edu>, 15 years ago
stristr: Remove. This stristr implementation had problems with Unicode: g_utf8_casefold is allowed to change the length of the string. It seems that several projects get this function by copying the code of utf8_strcasestr out of gtksourceview. Fortunately, we no longer need it. Signed-off-by: Anders Kaseorg <andersk@mit.edu>
  • Property mode set to 100644
File size: 7.0 KB
RevLine 
[7d4fbcd]1#include <stdio.h>
2#include <string.h>
3#include <stdlib.h>
[995eb4b]4#include <ctype.h>
[7d4fbcd]5#include "owl.h"
6
[ab31454]7void owl_text_indent(char *out, char *in, int n)
8{
[7d4fbcd]9  char *ptr1, *ptr2, *last;
10  int i;
11
12  strcpy(out, "");
13
14  last=in+strlen(in)-1;
15  ptr1=in;
16  while (ptr1<=last) {
17    for (i=0; i<n; i++) {
18      strcat(out, " ");
19    }
20    ptr2=strchr(ptr1, '\n');
21    if (!ptr2) {
22      strcat(out, ptr1);
23      break;
24    } else {
25      strncat(out, ptr1, ptr2-ptr1+1);
26    }
27    ptr1=ptr2+1;
28  }
29}
30
[ab31454]31int owl_text_num_lines(char *in)
32{
[7d4fbcd]33  int lines, i;
34
35  lines=0;
36  for (i=0; in[i]!='\0'; i++) {
37    if (in[i]=='\n') lines++;
38  }
39
40  /* if the last char wasn't a \n there's one more line */
[1bb1e67]41  if (i>0 && in[i-1]!='\n') lines++;
[7d4fbcd]42
43  return(lines);
44}
45
[3abf28b]46
47/* caller must free the return */
[ab31454]48char *owl_text_htmlstrip(char *in)
49{
[8d24696]50  char *ptr1, *end, *ptr2, *ptr3, *out, *out2;
[3abf28b]51
52  out=owl_malloc(strlen(in)+30);
53  strcpy(out, "");
54
55  ptr1=in;
56  end=in+strlen(in);
57 
58  while(ptr1<end) {
59    /* look for an open bracket */
60    ptr2=strchr(ptr1, '<');
61
[dafd919]62    /* if none, copy in from here to end and exit */
[3abf28b]63    if (ptr2==NULL) {
64      strcat(out, ptr1);
[8d24696]65      break;
[3abf28b]66    }
67
68    /* otherwise copy in everything before the open bracket */
69    if (ptr2>ptr1) {
70      strncat(out, ptr1, ptr2-ptr1);
71    }
72
73    /* find the close bracket */
74    ptr3=strchr(ptr2, '>');
75   
76    /* if there is no close, copy as you are and exit */
77    if (!ptr3) {
78      strcat(out, ptr2);
[8d24696]79      break;
[3abf28b]80    }
81
82    /* look for things we know */
[3bcf125]83    if (!strncasecmp(ptr2, "<BODY", 5) ||
[dafd919]84        !strncasecmp(ptr2, "<FONT", 5) ||
[3abf28b]85        !strncasecmp(ptr2, "<HTML", 5) ||
86        !strncasecmp(ptr2, "</FONT", 6) ||
87        !strncasecmp(ptr2, "</HTML", 6) ||
88        !strncasecmp(ptr2, "</BODY", 6)) {
89
90      /* advance to beyond the angle brakcet and go again */
91      ptr1=ptr3+1;
92      continue;
93    }
[75e3879]94    if (!strncasecmp(ptr2, "<BR>", 4)) {
95      strcat(out, "\n");
96      ptr1=ptr3+1;
97      continue;
98    }
[3abf28b]99
100    /* if it wasn't something we know, copy to the > and  go again */
[75e3879]101    strncat(out, ptr2, ptr3-ptr2+1);
[3abf28b]102    ptr1=ptr3+1;
103  }
[8d24696]104
[e3d9c77]105  out2=owl_text_substitute(out, "&lt;", "<");
[8d24696]106  owl_free(out);
[e3d9c77]107  out=owl_text_substitute(out2, "&gt;", ">");
[8d24696]108  owl_free(out2);
[e3d9c77]109  out2=owl_text_substitute(out, "&amp;", "&");
[8d24696]110  owl_free(out);
[e3d9c77]111  out=owl_text_substitute(out2, "&quot;", "\"");
[8d24696]112  owl_free(out2);
[e3d9c77]113  out2=owl_text_substitute(out, "&nbsp;", " ");
[8d24696]114  owl_free(out);
[e3d9c77]115  out=owl_text_substitute(out2, "&ensp;", "  ");
[8d24696]116  owl_free(out2);
[e3d9c77]117  out2=owl_text_substitute(out, "&emsp;", "   ");
[8d24696]118  owl_free(out);
[e3d9c77]119  out=owl_text_substitute(out2, "&endash;", "--");
[8d24696]120  owl_free(out2);
[e3d9c77]121  out2=owl_text_substitute(out, "&emdash;", "---");
[8d24696]122  owl_free(out);
123
124  return(out2);
[3abf28b]125}
[ab31454]126
[3e8ff1e]127#define OWL_TAB_WIDTH 8
128
129/* Caller must free return */
130char *owl_text_expand_tabs(char *in)
131{
132  int ntabs = 0;
133  char *p = in;
134  char *ret, *out;
135  int col;
136
137  while(*p) {
138    if (*(p++) == '\t') ntabs++;
139  }
140
141  ret = owl_malloc(strlen(in) + 1 + OWL_TAB_WIDTH * ntabs);
142
143  p = in;
144  out = ret;
145
146  col = 0;
147  while(*p) {
148    switch(*p) {
149    case '\t':
150      do {*(out++) = ' '; col++; } while (col % OWL_TAB_WIDTH);
151      break;
152    case '\n':
153      col = -1;
154    default:
155      col++;
156      *(out++) = *p;
157    }
158    p++;
159  }
160
161  *out = 0;
162
163  return ret;
164}
165
[ab31454]166/* caller must free the return */
167char *owl_text_wordwrap(char *in, int col)
168{
169  char *out;
170  int cur, lastspace, len, lastnewline;
171
172  out=owl_strdup(in);
173  len=strlen(in);
174  cur=0;
175  lastspace=-1;
176  lastnewline=-1;
177
178  while (cur<(len-1)) {
179    if (out[cur]==' ') {
180      lastspace=cur;
181      cur++;
182      continue;
183    } else if (out[cur]=='\n') {
184      lastnewline=cur;
185      cur++;
186      continue;
187    }
188
189    /* do we need to wrap? */
190    if ( (cur-(lastnewline+1)) > col ) {
191      if (lastspace==-1 ||
192          (lastnewline>0 && (lastspace<=lastnewline))) {
193        /* we can't help, sorry */
194        cur++;
195        continue;
196      }
197
198      /* turn the last space into a newline */
199      out[lastspace]='\n';
200      lastnewline=lastspace;
201      lastspace=-1;
202      cur++;
203      continue;
204    }
205
206    cur++;
207    continue;
208  }
209  return(out);
210}
[e3d9c77]211
[f82e233]212/* this modifies 'in' */
213void owl_text_wordunwrap(char *in)
214{
215  int i, j;
216
217  j=strlen(in);
218  for (i=0; i<j; i++) {
219    if ( (in[i]=='\n') &&
220         ((i>0) && (i<(j-1))) &&
221         (in[i-1]!='\n') &&
222         (in[i+1]!='\n') )
223      in[i]=' ';
224  }
225}
226
[e3d9c77]227/* return 1 if a string is only whitespace, otherwise 0 */
228int only_whitespace(char *s)
229{
[28ee32b]230  if (g_utf8_validate(s,-1,NULL)) {
231    char *p;
232    for(p = s; p[0]; p=g_utf8_next_char(p)) {
233      if (!g_unichar_isspace(g_utf8_get_char(p))) return 0;
234    }
235  }
236  else {
237    int i;
238    for (i=0; s[i]; i++) {
239      if (!isspace((int) s[i])) return(0);
240    }
[e3d9c77]241  }
242  return(1);
243}
244
245char *owl_getquoting(char *line)
246{
247  if (line[0]=='\0') return("'");
248  if (strchr(line, '\'')) return("\"");
249  if (strchr(line, '"')) return("'");
250  if (strchr(line, ' ')) return("'");
251  return("");
252}
253
254/* Return a string with any occurances of 'from' replaced with 'to'.
255 * Does not currently handle backslash quoting, but may in the future.
256 * Caller must free returned string.
257 */
258char *owl_text_substitute(char *in, char *from, char *to)
259{
260 
261  char *out;
262  int   outlen, tolen, fromlen, inpos=0, outpos=0;
263
264  if (!*from) return owl_strdup(in);
265
266  outlen = strlen(in)+1;
267  tolen  = strlen(to);
268  fromlen  = strlen(from);
[34509d5]269  out = owl_malloc(outlen);
[e3d9c77]270
271  while (in[inpos]) {
272    if (!strncmp(in+inpos, from, fromlen)) {
273      outlen += tolen;
274      out = owl_realloc(out, outlen);
275      strcpy(out+outpos, to);
276      inpos += fromlen;
277      outpos += tolen;
278    } else {
279      out[outpos] = in[inpos];
280      inpos++; outpos++;
281    }
282  }
283  out[outpos] = '\0';
284  return(out);
285}
286
287/* replace all instances of character a in buff with the character
288 * b.  buff must be null terminated.
289 */
290void owl_text_tr(char *buff, char a, char b)
291{
292  int i;
293
294  owl_function_debugmsg("In: %s", buff);
295  for (i=0; buff[i]!='\0'; i++) {
296    if (buff[i]==a) buff[i]=b;
297  }
298  owl_function_debugmsg("Out: %s", buff);
299}
300
301/* Return a string which is like 'in' except that every instance of
302 * any character in 'toquote' found in 'in' is preceeded by the string
303 * 'quotestr'.  For example, owl_text_quote(in, "+*.", "\") would
304 * place a backslash before every '+', '*' or '.' in 'in'.  It is
305 * permissable for a character in 'quotestr' to be in 'toquote'.
306 * On success returns the string, on error returns NULL.
307 */
308char *owl_text_quote(char *in, char *toquote, char *quotestr)
309{
[72db971]310  int i, x, r, place, escape;
[e3d9c77]311  int in_len, toquote_len, quotestr_len;
312  char *out;
313
314  in_len=strlen(in);
315  toquote_len=strlen(toquote);
316  quotestr_len=strlen(quotestr);
317  out=owl_malloc((in_len*quotestr_len)+30);
318  place=0;
[72db971]319  escape = 0;
320  for (i=0; i<in_len; i++) {
321    if(strchr(toquote, in[i]) != NULL)
322      escape++;
323  }
324  out = owl_malloc(in_len + quotestr_len*escape+1);
[e3d9c77]325  for (i=0; i<in_len; i++) {
326
327    /* check if it's a character that needs quoting */
328    for (x=0; x<toquote_len; x++) {
329      if (in[i]==toquote[x]) {
330        /* quote it */
331        for (r=0; r<quotestr_len; r++) {
332          out[place+r]=quotestr[r];
333        }
334        place+=quotestr_len;
335        break;
336      }
337    }
338
339    /* either way, we now copy over the character */
340    out[place]=in[i];
341    place++;
342  }
343  out[place]='\0';
344  return(out);
345}
Note: See TracBrowser for help on using the repository browser.