source: text.c @ 120dac7

release-1.9
Last change on this file since 120dac7 was f271129, checked in by Jason Gross <jgross@mit.edu>, 10 years ago
Fix up headers The additions to owl.h and some of the removals were done by Alejandro Sedeño <asedeno@mit.edu> in commit 77a0258b3919468fc9d7f7602588ac427ab36e6c. Notes: * I think owl.c lost the need for sys/time.h when we punted select() in favor of glib's main loop. * We don't actually need to include things like stdarg.h, stdio.h, glib/gstdio.h, glib-object.h. I think they get indirectly included via owl.h and/or glib.h. They're left in (or added in to) the files that use functions/types from them. * I'm not entirely sure what sys/socket.h is doing in message.c. It is there from the initial commit. I suspect it might have had something to do with the call to getnameinfo. message.c compiles without it, but http://pubs.opengroup.org/onlinepubs/009695399/functions/getnameinfo.html suggests that we're supposed to include it? *shrugs* I'm leaving it in, for now. (Rather, I'll leave one copy of the #include in.)
  • Property mode set to 100644
File size: 6.8 KB
RevLine 
[7d4fbcd]1#include "owl.h"
2
[4d24650]3/* Returns a copy of 'in' with each line indented 'n'
[ddbbcffa]4 * characters. Result must be freed with g_free. */
[14be3a5]5CALLER_OWN char *owl_text_indent(const char *in, int n, bool indent_first_line)
[ab31454]6{
[e19eb97]7  const char *ptr1, *ptr2, *last;
[4d24650]8  GString *out = g_string_new("");
[7d4fbcd]9  int i;
[14be3a5]10  bool indent_this_line = indent_first_line;
[7d4fbcd]11
12  last=in+strlen(in)-1;
13  ptr1=in;
14  while (ptr1<=last) {
[14be3a5]15    if (indent_this_line) {
16      for (i = 0; i < n; i++) {
17        g_string_append_c(out, ' ');
18      }
[7d4fbcd]19    }
[14be3a5]20    indent_this_line = true;
[7d4fbcd]21    ptr2=strchr(ptr1, '\n');
22    if (!ptr2) {
[4d24650]23      g_string_append(out, ptr1);
[7d4fbcd]24      break;
25    } else {
[4d24650]26      g_string_append_len(out, ptr1, ptr2-ptr1+1);
[7d4fbcd]27    }
28    ptr1=ptr2+1;
29  }
[4d24650]30  return g_string_free(out, false);
[7d4fbcd]31}
32
[e19eb97]33int owl_text_num_lines(const char *in)
[ab31454]34{
[7d4fbcd]35  int lines, i;
36
37  lines=0;
38  for (i=0; in[i]!='\0'; i++) {
39    if (in[i]=='\n') lines++;
40  }
41
42  /* if the last char wasn't a \n there's one more line */
[1bb1e67]43  if (i>0 && in[i-1]!='\n') lines++;
[7d4fbcd]44
45  return(lines);
46}
47
[3abf28b]48
49/* caller must free the return */
[6829afc]50CALLER_OWN char *owl_text_htmlstrip(const char *in)
[ab31454]51{
[e19eb97]52  const char *ptr1, *end, *ptr2, *ptr3;
[65b2173]53  char *out, *out2;
[3abf28b]54
[96828e4]55  out=g_new(char, strlen(in)+30);
[3abf28b]56  strcpy(out, "");
57
58  ptr1=in;
59  end=in+strlen(in);
60 
61  while(ptr1<end) {
62    /* look for an open bracket */
63    ptr2=strchr(ptr1, '<');
64
[dafd919]65    /* if none, copy in from here to end and exit */
[3abf28b]66    if (ptr2==NULL) {
67      strcat(out, ptr1);
[8d24696]68      break;
[3abf28b]69    }
70
71    /* otherwise copy in everything before the open bracket */
72    if (ptr2>ptr1) {
73      strncat(out, ptr1, ptr2-ptr1);
74    }
75
76    /* find the close bracket */
77    ptr3=strchr(ptr2, '>');
78   
79    /* if there is no close, copy as you are and exit */
80    if (!ptr3) {
81      strcat(out, ptr2);
[8d24696]82      break;
[3abf28b]83    }
84
85    /* look for things we know */
[3bcf125]86    if (!strncasecmp(ptr2, "<BODY", 5) ||
[dafd919]87        !strncasecmp(ptr2, "<FONT", 5) ||
[3abf28b]88        !strncasecmp(ptr2, "<HTML", 5) ||
89        !strncasecmp(ptr2, "</FONT", 6) ||
90        !strncasecmp(ptr2, "</HTML", 6) ||
91        !strncasecmp(ptr2, "</BODY", 6)) {
92
93      /* advance to beyond the angle brakcet and go again */
94      ptr1=ptr3+1;
95      continue;
96    }
[75e3879]97    if (!strncasecmp(ptr2, "<BR>", 4)) {
98      strcat(out, "\n");
99      ptr1=ptr3+1;
100      continue;
101    }
[3abf28b]102
103    /* if it wasn't something we know, copy to the > and  go again */
[75e3879]104    strncat(out, ptr2, ptr3-ptr2+1);
[3abf28b]105    ptr1=ptr3+1;
106  }
[8d24696]107
[e3d9c77]108  out2=owl_text_substitute(out, "&lt;", "<");
[ddbbcffa]109  g_free(out);
[e3d9c77]110  out=owl_text_substitute(out2, "&gt;", ">");
[ddbbcffa]111  g_free(out2);
[e3d9c77]112  out2=owl_text_substitute(out, "&amp;", "&");
[ddbbcffa]113  g_free(out);
[e3d9c77]114  out=owl_text_substitute(out2, "&quot;", "\"");
[ddbbcffa]115  g_free(out2);
[e3d9c77]116  out2=owl_text_substitute(out, "&nbsp;", " ");
[ddbbcffa]117  g_free(out);
[e3d9c77]118  out=owl_text_substitute(out2, "&ensp;", "  ");
[ddbbcffa]119  g_free(out2);
[e3d9c77]120  out2=owl_text_substitute(out, "&emsp;", "   ");
[ddbbcffa]121  g_free(out);
[e3d9c77]122  out=owl_text_substitute(out2, "&endash;", "--");
[ddbbcffa]123  g_free(out2);
[e3d9c77]124  out2=owl_text_substitute(out, "&emdash;", "---");
[ddbbcffa]125  g_free(out);
[8d24696]126
127  return(out2);
[3abf28b]128}
[ab31454]129
[3e8ff1e]130/* Caller must free return */
[6829afc]131CALLER_OWN char *owl_text_expand_tabs(const char *in)
[3e8ff1e]132{
[f7cd7c9]133  int len = 0;
[e19eb97]134  const char *p = in;
[3e8ff1e]135  char *ret, *out;
136  int col;
137
[f7cd7c9]138  col = 0;
[3e8ff1e]139  while(*p) {
[f7cd7c9]140    gunichar c = g_utf8_get_char(p);
[72ec874]141    const char *q = g_utf8_next_char(p);
[f7cd7c9]142    switch (c) {
143    case '\t':
144      do { len++; col++; } while (col % OWL_TAB_WIDTH);
145      p = q;
146      continue;
147    case '\n':
148      col = 0;
149      break;
150    default:
151      col += mk_wcwidth(c);
152      break;
153    }
154    len += q - p;
155    p = q;
[3e8ff1e]156  }
157
[96828e4]158  ret = g_new(char, len + 1);
[3e8ff1e]159
160  p = in;
161  out = ret;
162
163  col = 0;
164  while(*p) {
[f7cd7c9]165    gunichar c = g_utf8_get_char(p);
[72ec874]166    const char *q = g_utf8_next_char(p);
[f7cd7c9]167    switch (c) {
[3e8ff1e]168    case '\t':
169      do {*(out++) = ' '; col++; } while (col % OWL_TAB_WIDTH);
[f7cd7c9]170      p = q;
171      continue;
[3e8ff1e]172    case '\n':
[f7cd7c9]173      col = 0;
174      break;
[3e8ff1e]175    default:
[f7cd7c9]176      col += mk_wcwidth(c);
177      break;
[3e8ff1e]178    }
[f7cd7c9]179    memcpy(out, p, q - p);
180    out += q - p;
181    p = q;
[3e8ff1e]182  }
183
184  *out = 0;
185
186  return ret;
187}
188
[ab31454]189/* caller must free the return */
[6829afc]190CALLER_OWN char *owl_text_wordwrap(const char *in, int col)
[ab31454]191{
192  char *out;
193  int cur, lastspace, len, lastnewline;
194
[d4927a7]195  out=g_strdup(in);
[ab31454]196  len=strlen(in);
197  cur=0;
198  lastspace=-1;
199  lastnewline=-1;
200
201  while (cur<(len-1)) {
202    if (out[cur]==' ') {
203      lastspace=cur;
204      cur++;
205      continue;
206    } else if (out[cur]=='\n') {
207      lastnewline=cur;
208      cur++;
209      continue;
210    }
211
212    /* do we need to wrap? */
213    if ( (cur-(lastnewline+1)) > col ) {
214      if (lastspace==-1 ||
215          (lastnewline>0 && (lastspace<=lastnewline))) {
216        /* we can't help, sorry */
217        cur++;
218        continue;
219      }
220
221      /* turn the last space into a newline */
222      out[lastspace]='\n';
223      lastnewline=lastspace;
224      lastspace=-1;
225      cur++;
226      continue;
227    }
228
229    cur++;
230    continue;
231  }
232  return(out);
233}
[e3d9c77]234
[f82e233]235/* this modifies 'in' */
236void owl_text_wordunwrap(char *in)
237{
238  int i, j;
239
240  j=strlen(in);
241  for (i=0; i<j; i++) {
242    if ( (in[i]=='\n') &&
243         ((i>0) && (i<(j-1))) &&
244         (in[i-1]!='\n') &&
245         (in[i+1]!='\n') )
246      in[i]=' ';
247  }
248}
249
[e3d9c77]250/* return 1 if a string is only whitespace, otherwise 0 */
[e19eb97]251int only_whitespace(const char *s)
[e3d9c77]252{
[28ee32b]253  if (g_utf8_validate(s,-1,NULL)) {
[e19eb97]254    const char *p;
[28ee32b]255    for(p = s; p[0]; p=g_utf8_next_char(p)) {
256      if (!g_unichar_isspace(g_utf8_get_char(p))) return 0;
257    }
258  }
259  else {
260    int i;
261    for (i=0; s[i]; i++) {
262      if (!isspace((int) s[i])) return(0);
263    }
[e3d9c77]264  }
265  return(1);
266}
267
268/* Return a string with any occurances of 'from' replaced with 'to'.
269 * Caller must free returned string.
270 */
[6829afc]271CALLER_OWN char *owl_text_substitute(const char *in, const char *from, const char *to)
[e3d9c77]272{
[42ee1be]273  char **split = g_strsplit(in, from, 0), *out;
274  out = g_strjoinv(to, split);
275  g_strfreev(split);
276  return out;
[e3d9c77]277}
278
279/* Return a string which is like 'in' except that every instance of
280 * any character in 'toquote' found in 'in' is preceeded by the string
281 * 'quotestr'.  For example, owl_text_quote(in, "+*.", "\") would
282 * place a backslash before every '+', '*' or '.' in 'in'.  It is
283 * permissable for a character in 'quotestr' to be in 'toquote'.
284 * On success returns the string, on error returns NULL.
285 */
[6829afc]286CALLER_OWN char *owl_text_quote(const char *in, const char *toquote, const char *quotestr)
[e3d9c77]287{
[72db971]288  int i, x, r, place, escape;
[e3d9c77]289  int in_len, toquote_len, quotestr_len;
290  char *out;
291
292  in_len=strlen(in);
293  toquote_len=strlen(toquote);
294  quotestr_len=strlen(quotestr);
295  place=0;
[72db971]296  escape = 0;
297  for (i=0; i<in_len; i++) {
298    if(strchr(toquote, in[i]) != NULL)
299      escape++;
300  }
[96828e4]301  out = g_new(char, in_len + quotestr_len*escape+1);
[e3d9c77]302  for (i=0; i<in_len; i++) {
303
304    /* check if it's a character that needs quoting */
305    for (x=0; x<toquote_len; x++) {
306      if (in[i]==toquote[x]) {
307        /* quote it */
308        for (r=0; r<quotestr_len; r++) {
309          out[place+r]=quotestr[r];
310        }
311        place+=quotestr_len;
312        break;
313      }
314    }
315
316    /* either way, we now copy over the character */
317    out[place]=in[i];
318    place++;
319  }
320  out[place]='\0';
321  return(out);
322}
Note: See TracBrowser for help on using the repository browser.