source: text.c @ 28ee32b

barnowl_perlaimdebianrelease-1.4release-1.5release-1.6release-1.7release-1.8release-1.9
Last change on this file since 28ee32b was 28ee32b, checked in by Alejandro R. Sedeño <asedeno@mit.edu>, 13 years ago
UTF-8 - first pass unicode changes: * remove downstr() from text.c, replace on site with calls to g_utf8_strdown. In place downcasing is not a good idea, so the downstr() contract is unfulfillable. * make owl_text_truncate_cols() and owl_fmtext_truncate_cols() understand character width. This may need more work. Some code duplication - see if we can refactor. * stristr() rewritten to yse g_utf_casefold() instead of downstr(), and restructured to have a single return. * only_whitespace() rewritten for unicode. glib changes: * rewrite owl_sprintf() in terms of g_strdup_vprintf() WARNING: THIS IS NOT SAFE YET. Network data is not yet sanitized. Non UTF-8 inputs may do horrible things to you. This phase is just working on rendering.
  • Property mode set to 100644
File size: 9.2 KB
Line 
1#include <stdio.h>
2#include <string.h>
3#include <stdlib.h>
4#include <ctype.h>
5#include "owl.h"
6
7static const char fileIdent[] = "$Id$";
8
9/* start with line aline (where the first line is 1) and print 'lines'
10 *  lines
11 */
12int owl_text_truncate_lines(char *out, char *in, int aline, int lines)
13{
14  char *ptr1, *ptr2;
15  int i;
16
17  strcpy(out, "");
18 
19  if (aline==0) aline=1; /* really illegal use */
20
21  /* find the starting line */
22  ptr1=in;
23  if (aline!=1) {
24     for (i=0; i<aline-1; i++) {
25      ptr1=strchr(ptr1, '\n');
26      if (!ptr1) return(-1);
27      ptr1++;
28    }
29  }
30  /* ptr1 now holds the starting point */
31
32  /* copy in the next 'lines' lines */
33  if (lines<1) return(-1);
34 
35  for (i=0; i<lines; i++) {
36    ptr2=strchr(ptr1, '\n');
37    if (!ptr2) {
38      strcat(out, ptr1);
39      return(-1);
40    }
41    strncat(out, ptr1, ptr2-ptr1+1);
42    ptr1=ptr2+1;
43  }
44  return(0);
45}
46
47 
48/* the first column is column 0.  The message is expected to end in a
49 * new line for now */
50void owl_text_truncate_cols(char *out, char *in, int acol, int bcol)
51{
52  char *ptr_s, *ptr_e, *ptr_c, *tmpbuff, *last;
53  int col, cnt;
54 
55  tmpbuff=owl_malloc(strlen(in)+20);
56
57  strcpy(tmpbuff, "");
58  last=in+strlen(in)-1;
59  ptr_s=in;
60  while (ptr_s<last) {
61    ptr_e=strchr(ptr_s, '\n');
62    if (!ptr_e) {
63      /* but this shouldn't happen if we end in a \n */
64      break;
65    }
66   
67    if (ptr_e==ptr_s) {
68      strcat(tmpbuff, "\n");
69      ptr_s++;
70      continue;
71    }
72
73    col = 0;
74    cnt = 0;
75    ptr_c = ptr_s;
76    while(col < bcol && ptr_c < ptr_e) {
77      gunichar c = g_utf8_get_char(ptr_c);
78      if (g_unichar_iswide(c)) {
79        if (col + 2 > bcol) break;
80        else col += 2;
81      }
82      else if (g_unichar_type(c) == G_UNICODE_NON_SPACING_MARK) ; /*do nothing*/
83      /* We may need more special cases here... unicode spacing is hard. */
84      else {
85        if (col + 1 > bcol) break;
86        else ++col;
87      }
88      ptr_c = g_utf8_next_char(ptr_c);
89      if (col >= acol) ++cnt;
90      if (col <= acol) ptr_s = ptr_c;
91    }
92    strncat(tmpbuff, ptr_s, ptr_c - ptr_s - 1);
93    strcat(tmpbuff, "\n");
94    ptr_s = ptr_e + 1;
95#if 0
96    /* we need to check that we won't run over here */
97    if ( (ptr_e-ptr_s) < (bcol-acol) ) {
98      len=ptr_e-(ptr_s+acol);
99    } else {
100      len=bcol-acol;
101    }
102    if ((ptr_s+len)>=last) {
103      len-=last-(ptr_s+len);
104    }
105
106    strncat(tmpbuff, ptr_s+acol, len);
107    strcat(tmpbuff, "\n");
108
109    ptr_s=ptr_e+1;
110#endif
111  }
112  strcpy(out, tmpbuff);
113  owl_free(tmpbuff);
114}
115
116
117void owl_text_indent(char *out, char *in, int n)
118{
119  char *ptr1, *ptr2, *last;
120  int i;
121
122  strcpy(out, "");
123
124  last=in+strlen(in)-1;
125  ptr1=in;
126  while (ptr1<=last) {
127    for (i=0; i<n; i++) {
128      strcat(out, " ");
129    }
130    ptr2=strchr(ptr1, '\n');
131    if (!ptr2) {
132      strcat(out, ptr1);
133      break;
134    } else {
135      strncat(out, ptr1, ptr2-ptr1+1);
136    }
137    ptr1=ptr2+1;
138  }
139}
140
141int owl_text_num_lines(char *in)
142{
143  int lines, i;
144
145  lines=0;
146  for (i=0; in[i]!='\0'; i++) {
147    if (in[i]=='\n') lines++;
148  }
149
150  /* if the last char wasn't a \n there's one more line */
151  if (i>0 && in[i-1]!='\n') lines++;
152
153  return(lines);
154}
155
156
157/* caller must free the return */
158char *owl_text_htmlstrip(char *in)
159{
160  char *ptr1, *end, *ptr2, *ptr3, *out, *out2;
161
162  out=owl_malloc(strlen(in)+30);
163  strcpy(out, "");
164
165  ptr1=in;
166  end=in+strlen(in);
167 
168  while(ptr1<end) {
169    /* look for an open bracket */
170    ptr2=strchr(ptr1, '<');
171
172    /* if none, copy in from here to end and exit */
173    if (ptr2==NULL) {
174      strcat(out, ptr1);
175      break;
176    }
177
178    /* otherwise copy in everything before the open bracket */
179    if (ptr2>ptr1) {
180      strncat(out, ptr1, ptr2-ptr1);
181    }
182
183    /* find the close bracket */
184    ptr3=strchr(ptr2, '>');
185   
186    /* if there is no close, copy as you are and exit */
187    if (!ptr3) {
188      strcat(out, ptr2);
189      break;
190    }
191
192    /* look for things we know */
193    if (!strncasecmp(ptr2, "<BODY ", 6) ||
194        !strncasecmp(ptr2, "<FONT", 5) ||
195        !strncasecmp(ptr2, "<HTML", 5) ||
196        !strncasecmp(ptr2, "</FONT", 6) ||
197        !strncasecmp(ptr2, "</HTML", 6) ||
198        !strncasecmp(ptr2, "</BODY", 6)) {
199
200      /* advance to beyond the angle brakcet and go again */
201      ptr1=ptr3+1;
202      continue;
203    }
204    if (!strncasecmp(ptr2, "<BR>", 4)) {
205      strcat(out, "\n");
206      ptr1=ptr3+1;
207      continue;
208    }
209
210    /* if it wasn't something we know, copy to the > and  go again */
211    strncat(out, ptr2, ptr3-ptr2+1);
212    ptr1=ptr3+1;
213  }
214
215  out2=owl_text_substitute(out, "&lt;", "<");
216  owl_free(out);
217  out=owl_text_substitute(out2, "&gt;", ">");
218  owl_free(out2);
219  out2=owl_text_substitute(out, "&amp;", "&");
220  owl_free(out);
221  out=owl_text_substitute(out2, "&quot;", "\"");
222  owl_free(out2);
223  out2=owl_text_substitute(out, "&nbsp;", " ");
224  owl_free(out);
225  out=owl_text_substitute(out2, "&ensp;", "  ");
226  owl_free(out2);
227  out2=owl_text_substitute(out, "&emsp;", "   ");
228  owl_free(out);
229  out=owl_text_substitute(out2, "&endash;", "--");
230  owl_free(out2);
231  out2=owl_text_substitute(out, "&emdash;", "---");
232  owl_free(out);
233
234  return(out2);
235}
236
237/* caller must free the return */
238char *owl_text_wordwrap(char *in, int col)
239{
240  char *out;
241  int cur, lastspace, len, lastnewline;
242
243  out=owl_strdup(in);
244  len=strlen(in);
245  cur=0;
246  lastspace=-1;
247  lastnewline=-1;
248
249  while (cur<(len-1)) {
250    if (out[cur]==' ') {
251      lastspace=cur;
252      cur++;
253      continue;
254    } else if (out[cur]=='\n') {
255      lastnewline=cur;
256      cur++;
257      continue;
258    }
259
260    /* do we need to wrap? */
261    if ( (cur-(lastnewline+1)) > col ) {
262      if (lastspace==-1 ||
263          (lastnewline>0 && (lastspace<=lastnewline))) {
264        /* we can't help, sorry */
265        cur++;
266        continue;
267      }
268
269      /* turn the last space into a newline */
270      out[lastspace]='\n';
271      lastnewline=lastspace;
272      lastspace=-1;
273      cur++;
274      continue;
275    }
276
277    cur++;
278    continue;
279  }
280  return(out);
281}
282
283/* this modifies 'in' */
284void owl_text_wordunwrap(char *in)
285{
286  int i, j;
287
288  j=strlen(in);
289  for (i=0; i<j; i++) {
290    if ( (in[i]=='\n') &&
291         ((i>0) && (i<(j-1))) &&
292         (in[i-1]!='\n') &&
293         (in[i+1]!='\n') )
294      in[i]=' ';
295  }
296}
297
298/* exactly like strstr but case insensitive */
299char *stristr(char *a, char *b)
300{
301  char *x, *y;
302  char *ret = NULL;
303  if ((x = g_utf8_casefold(a, -1)) != NULL) {
304    if ((y = g_utf8_casefold(b, -1)) != NULL) {
305      ret = strstr(x, y);
306      if (ret != NULL) {
307        ret = ret - x + a;
308      }
309      g_free(y);
310    }
311    g_free(x);
312  }
313  return(ret);
314}
315
316/* return 1 if a string is only whitespace, otherwise 0 */
317int only_whitespace(char *s)
318{
319  if (g_utf8_validate(s,-1,NULL)) {
320    char *p;
321    for(p = s; p[0]; p=g_utf8_next_char(p)) {
322      if (!g_unichar_isspace(g_utf8_get_char(p))) return 0;
323    }
324  }
325  else {
326    int i;
327    for (i=0; s[i]; i++) {
328      if (!isspace((int) s[i])) return(0);
329    }
330  }
331  return(1);
332}
333
334char *owl_getquoting(char *line)
335{
336  if (line[0]=='\0') return("'");
337  if (strchr(line, '\'')) return("\"");
338  if (strchr(line, '"')) return("'");
339  if (strchr(line, ' ')) return("'");
340  return("");
341}
342
343/* Return a string with any occurances of 'from' replaced with 'to'.
344 * Does not currently handle backslash quoting, but may in the future.
345 * Caller must free returned string.
346 */
347char *owl_text_substitute(char *in, char *from, char *to)
348{
349 
350  char *out;
351  int   outlen, tolen, fromlen, inpos=0, outpos=0;
352
353  if (!*from) return owl_strdup(in);
354
355  outlen = strlen(in)+1;
356  tolen  = strlen(to);
357  fromlen  = strlen(from);
358  out = owl_malloc(outlen);
359
360  while (in[inpos]) {
361    if (!strncmp(in+inpos, from, fromlen)) {
362      outlen += tolen;
363      out = owl_realloc(out, outlen);
364      strcpy(out+outpos, to);
365      inpos += fromlen;
366      outpos += tolen;
367    } else {
368      out[outpos] = in[inpos];
369      inpos++; outpos++;
370    }
371  }
372  out[outpos] = '\0';
373  return(out);
374}
375
376/* replace all instances of character a in buff with the character
377 * b.  buff must be null terminated.
378 */
379void owl_text_tr(char *buff, char a, char b)
380{
381  int i;
382
383  owl_function_debugmsg("In: %s", buff);
384  for (i=0; buff[i]!='\0'; i++) {
385    if (buff[i]==a) buff[i]=b;
386  }
387  owl_function_debugmsg("Out: %s", buff);
388}
389
390/* Return a string which is like 'in' except that every instance of
391 * any character in 'toquote' found in 'in' is preceeded by the string
392 * 'quotestr'.  For example, owl_text_quote(in, "+*.", "\") would
393 * place a backslash before every '+', '*' or '.' in 'in'.  It is
394 * permissable for a character in 'quotestr' to be in 'toquote'.
395 * On success returns the string, on error returns NULL.
396 */
397char *owl_text_quote(char *in, char *toquote, char *quotestr)
398{
399  int i, x, r, place, escape;
400  int in_len, toquote_len, quotestr_len;
401  char *out;
402
403  in_len=strlen(in);
404  toquote_len=strlen(toquote);
405  quotestr_len=strlen(quotestr);
406  out=owl_malloc((in_len*quotestr_len)+30);
407  place=0;
408  escape = 0;
409  for (i=0; i<in_len; i++) {
410    if(strchr(toquote, in[i]) != NULL)
411      escape++;
412  }
413  out = owl_malloc(in_len + quotestr_len*escape+1);
414  for (i=0; i<in_len; i++) {
415
416    /* check if it's a character that needs quoting */
417    for (x=0; x<toquote_len; x++) {
418      if (in[i]==toquote[x]) {
419        /* quote it */
420        for (r=0; r<quotestr_len; r++) {
421          out[place+r]=quotestr[r];
422        }
423        place+=quotestr_len;
424        break;
425      }
426    }
427
428    /* either way, we now copy over the character */
429    out[place]=in[i];
430    place++;
431  }
432  out[place]='\0';
433  return(out);
434}
Note: See TracBrowser for help on using the repository browser.