source: text.c @ f60b417

Last change on this file since f60b417 was 4d4f0ba, checked in by Jason Gross <jgross@mit.edu>, 13 years ago
Give owl_text_indent an indent_first_line parameter This is useful if you need to indent text that isn't broken into chunks that end with newlines.
  • Property mode set to 100644
File size: 6.8 KB
RevLine 
[7d4fbcd]1#include <stdio.h>
2#include <string.h>
3#include <stdlib.h>
[995eb4b]4#include <ctype.h>
[7d4fbcd]5#include "owl.h"
6
[4d24650]7/* Returns a copy of 'in' with each line indented 'n'
[ddbbcffa]8 * characters. Result must be freed with g_free. */
[4d4f0ba]9CALLER_OWN char *owl_text_indent(const char *in, int n, bool indent_first_line)
[ab31454]10{
[e19eb97]11  const char *ptr1, *ptr2, *last;
[4d24650]12  GString *out = g_string_new("");
[7d4fbcd]13  int i;
[4d4f0ba]14  bool indent_this_line = indent_first_line;
[7d4fbcd]15
16  last=in+strlen(in)-1;
17  ptr1=in;
18  while (ptr1<=last) {
[4d4f0ba]19    if (indent_this_line) {
20      for (i = 0; i < n; i++) {
21        g_string_append_c(out, ' ');
22      }
[7d4fbcd]23    }
[4d4f0ba]24    indent_this_line = true;
[7d4fbcd]25    ptr2=strchr(ptr1, '\n');
26    if (!ptr2) {
[4d24650]27      g_string_append(out, ptr1);
[7d4fbcd]28      break;
29    } else {
[4d24650]30      g_string_append_len(out, ptr1, ptr2-ptr1+1);
[7d4fbcd]31    }
32    ptr1=ptr2+1;
33  }
[4d24650]34  return g_string_free(out, false);
[7d4fbcd]35}
36
[e19eb97]37int owl_text_num_lines(const char *in)
[ab31454]38{
[7d4fbcd]39  int lines, i;
40
41  lines=0;
42  for (i=0; in[i]!='\0'; i++) {
43    if (in[i]=='\n') lines++;
44  }
45
46  /* if the last char wasn't a \n there's one more line */
[1bb1e67]47  if (i>0 && in[i-1]!='\n') lines++;
[7d4fbcd]48
49  return(lines);
50}
51
[3abf28b]52
53/* caller must free the return */
[6829afc]54CALLER_OWN char *owl_text_htmlstrip(const char *in)
[ab31454]55{
[e19eb97]56  const char *ptr1, *end, *ptr2, *ptr3;
[65b2173]57  char *out, *out2;
[3abf28b]58
[96828e4]59  out=g_new(char, strlen(in)+30);
[3abf28b]60  strcpy(out, "");
61
62  ptr1=in;
63  end=in+strlen(in);
64 
65  while(ptr1<end) {
66    /* look for an open bracket */
67    ptr2=strchr(ptr1, '<');
68
[dafd919]69    /* if none, copy in from here to end and exit */
[3abf28b]70    if (ptr2==NULL) {
71      strcat(out, ptr1);
[8d24696]72      break;
[3abf28b]73    }
74
75    /* otherwise copy in everything before the open bracket */
76    if (ptr2>ptr1) {
77      strncat(out, ptr1, ptr2-ptr1);
78    }
79
80    /* find the close bracket */
81    ptr3=strchr(ptr2, '>');
82   
83    /* if there is no close, copy as you are and exit */
84    if (!ptr3) {
85      strcat(out, ptr2);
[8d24696]86      break;
[3abf28b]87    }
88
89    /* look for things we know */
[3bcf125]90    if (!strncasecmp(ptr2, "<BODY", 5) ||
[dafd919]91        !strncasecmp(ptr2, "<FONT", 5) ||
[3abf28b]92        !strncasecmp(ptr2, "<HTML", 5) ||
93        !strncasecmp(ptr2, "</FONT", 6) ||
94        !strncasecmp(ptr2, "</HTML", 6) ||
95        !strncasecmp(ptr2, "</BODY", 6)) {
96
97      /* advance to beyond the angle brakcet and go again */
98      ptr1=ptr3+1;
99      continue;
100    }
[75e3879]101    if (!strncasecmp(ptr2, "<BR>", 4)) {
102      strcat(out, "\n");
103      ptr1=ptr3+1;
104      continue;
105    }
[3abf28b]106
107    /* if it wasn't something we know, copy to the > and  go again */
[75e3879]108    strncat(out, ptr2, ptr3-ptr2+1);
[3abf28b]109    ptr1=ptr3+1;
110  }
[8d24696]111
[e3d9c77]112  out2=owl_text_substitute(out, "&lt;", "<");
[ddbbcffa]113  g_free(out);
[e3d9c77]114  out=owl_text_substitute(out2, "&gt;", ">");
[ddbbcffa]115  g_free(out2);
[e3d9c77]116  out2=owl_text_substitute(out, "&amp;", "&");
[ddbbcffa]117  g_free(out);
[e3d9c77]118  out=owl_text_substitute(out2, "&quot;", "\"");
[ddbbcffa]119  g_free(out2);
[e3d9c77]120  out2=owl_text_substitute(out, "&nbsp;", " ");
[ddbbcffa]121  g_free(out);
[e3d9c77]122  out=owl_text_substitute(out2, "&ensp;", "  ");
[ddbbcffa]123  g_free(out2);
[e3d9c77]124  out2=owl_text_substitute(out, "&emsp;", "   ");
[ddbbcffa]125  g_free(out);
[e3d9c77]126  out=owl_text_substitute(out2, "&endash;", "--");
[ddbbcffa]127  g_free(out2);
[e3d9c77]128  out2=owl_text_substitute(out, "&emdash;", "---");
[ddbbcffa]129  g_free(out);
[8d24696]130
131  return(out2);
[3abf28b]132}
[ab31454]133
[3e8ff1e]134/* Caller must free return */
[6829afc]135CALLER_OWN char *owl_text_expand_tabs(const char *in)
[3e8ff1e]136{
[f7cd7c9]137  int len = 0;
[e19eb97]138  const char *p = in;
[3e8ff1e]139  char *ret, *out;
140  int col;
141
[f7cd7c9]142  col = 0;
[3e8ff1e]143  while(*p) {
[f7cd7c9]144    gunichar c = g_utf8_get_char(p);
[72ec874]145    const char *q = g_utf8_next_char(p);
[f7cd7c9]146    switch (c) {
147    case '\t':
148      do { len++; col++; } while (col % OWL_TAB_WIDTH);
149      p = q;
150      continue;
151    case '\n':
152      col = 0;
153      break;
154    default:
155      col += mk_wcwidth(c);
156      break;
157    }
158    len += q - p;
159    p = q;
[3e8ff1e]160  }
161
[96828e4]162  ret = g_new(char, len + 1);
[3e8ff1e]163
164  p = in;
165  out = ret;
166
167  col = 0;
168  while(*p) {
[f7cd7c9]169    gunichar c = g_utf8_get_char(p);
[72ec874]170    const char *q = g_utf8_next_char(p);
[f7cd7c9]171    switch (c) {
[3e8ff1e]172    case '\t':
173      do {*(out++) = ' '; col++; } while (col % OWL_TAB_WIDTH);
[f7cd7c9]174      p = q;
175      continue;
[3e8ff1e]176    case '\n':
[f7cd7c9]177      col = 0;
178      break;
[3e8ff1e]179    default:
[f7cd7c9]180      col += mk_wcwidth(c);
181      break;
[3e8ff1e]182    }
[f7cd7c9]183    memcpy(out, p, q - p);
184    out += q - p;
185    p = q;
[3e8ff1e]186  }
187
188  *out = 0;
189
190  return ret;
191}
192
[ab31454]193/* caller must free the return */
[6829afc]194CALLER_OWN char *owl_text_wordwrap(const char *in, int col)
[ab31454]195{
196  char *out;
197  int cur, lastspace, len, lastnewline;
198
[d4927a7]199  out=g_strdup(in);
[ab31454]200  len=strlen(in);
201  cur=0;
202  lastspace=-1;
203  lastnewline=-1;
204
205  while (cur<(len-1)) {
206    if (out[cur]==' ') {
207      lastspace=cur;
208      cur++;
209      continue;
210    } else if (out[cur]=='\n') {
211      lastnewline=cur;
212      cur++;
213      continue;
214    }
215
216    /* do we need to wrap? */
217    if ( (cur-(lastnewline+1)) > col ) {
218      if (lastspace==-1 ||
219          (lastnewline>0 && (lastspace<=lastnewline))) {
220        /* we can't help, sorry */
221        cur++;
222        continue;
223      }
224
225      /* turn the last space into a newline */
226      out[lastspace]='\n';
227      lastnewline=lastspace;
228      lastspace=-1;
229      cur++;
230      continue;
231    }
232
233    cur++;
234    continue;
235  }
236  return(out);
237}
[e3d9c77]238
[f82e233]239/* this modifies 'in' */
240void owl_text_wordunwrap(char *in)
241{
242  int i, j;
243
244  j=strlen(in);
245  for (i=0; i<j; i++) {
246    if ( (in[i]=='\n') &&
247         ((i>0) && (i<(j-1))) &&
248         (in[i-1]!='\n') &&
249         (in[i+1]!='\n') )
250      in[i]=' ';
251  }
252}
253
[e3d9c77]254/* return 1 if a string is only whitespace, otherwise 0 */
[e19eb97]255int only_whitespace(const char *s)
[e3d9c77]256{
[28ee32b]257  if (g_utf8_validate(s,-1,NULL)) {
[e19eb97]258    const char *p;
[28ee32b]259    for(p = s; p[0]; p=g_utf8_next_char(p)) {
260      if (!g_unichar_isspace(g_utf8_get_char(p))) return 0;
261    }
262  }
263  else {
264    int i;
265    for (i=0; s[i]; i++) {
266      if (!isspace((int) s[i])) return(0);
267    }
[e3d9c77]268  }
269  return(1);
270}
271
272/* Return a string with any occurances of 'from' replaced with 'to'.
273 * Caller must free returned string.
274 */
[6829afc]275CALLER_OWN char *owl_text_substitute(const char *in, const char *from, const char *to)
[e3d9c77]276{
[42ee1be]277  char **split = g_strsplit(in, from, 0), *out;
278  out = g_strjoinv(to, split);
279  g_strfreev(split);
280  return out;
[e3d9c77]281}
282
283/* Return a string which is like 'in' except that every instance of
284 * any character in 'toquote' found in 'in' is preceeded by the string
285 * 'quotestr'.  For example, owl_text_quote(in, "+*.", "\") would
286 * place a backslash before every '+', '*' or '.' in 'in'.  It is
287 * permissable for a character in 'quotestr' to be in 'toquote'.
288 * On success returns the string, on error returns NULL.
289 */
[6829afc]290CALLER_OWN char *owl_text_quote(const char *in, const char *toquote, const char *quotestr)
[e3d9c77]291{
[72db971]292  int i, x, r, place, escape;
[e3d9c77]293  int in_len, toquote_len, quotestr_len;
294  char *out;
295
296  in_len=strlen(in);
297  toquote_len=strlen(toquote);
298  quotestr_len=strlen(quotestr);
299  place=0;
[72db971]300  escape = 0;
301  for (i=0; i<in_len; i++) {
302    if(strchr(toquote, in[i]) != NULL)
303      escape++;
304  }
[96828e4]305  out = g_new(char, in_len + quotestr_len*escape+1);
[e3d9c77]306  for (i=0; i<in_len; i++) {
307
308    /* check if it's a character that needs quoting */
309    for (x=0; x<toquote_len; x++) {
310      if (in[i]==toquote[x]) {
311        /* quote it */
312        for (r=0; r<quotestr_len; r++) {
313          out[place+r]=quotestr[r];
314        }
315        place+=quotestr_len;
316        break;
317      }
318    }
319
320    /* either way, we now copy over the character */
321    out[place]=in[i];
322    place++;
323  }
324  out[place]='\0';
325  return(out);
326}
Note: See TracBrowser for help on using the repository browser.