Context Navigation

source: text.c @ f60b417

Visit:

Last change on this file since f60b417 was 4d4f0ba, checked in by Jason Gross <jgross@mit.edu>, 13 years ago
Give owl_text_indent an indent_first_line parameter This is useful if you need to indent text that isn't broken into chunks that end with newlines.
Property mode set to `100644`
File size: 6.8 KB

Line
1	#include <stdio.h>
2	#include <string.h>
3	#include <stdlib.h>
4	#include <ctype.h>
5	#include "owl.h"
6
7	/* Returns a copy of 'in' with each line indented 'n'
8	* characters. Result must be freed with g_free. */
9	CALLER_OWN char owl_text_indent(const char in, int n, bool indent_first_line)
10	{
11	const char ptr1, ptr2, *last;
12	GString *out = g_string_new("");
13	int i;
14	bool indent_this_line = indent_first_line;
15
16	last=in+strlen(in)-1;
17	ptr1=in;
18	while (ptr1<=last) {
19	if (indent_this_line) {
20	for (i = 0; i < n; i++) {
21	g_string_append_c(out, ' ');
22	}
23	}
24	indent_this_line = true;
25	ptr2=strchr(ptr1, '\n');
26	if (!ptr2) {
27	g_string_append(out, ptr1);
28	break;
29	} else {
30	g_string_append_len(out, ptr1, ptr2-ptr1+1);
31	}
32	ptr1=ptr2+1;
33	}
34	return g_string_free(out, false);
35	}
36
37	int owl_text_num_lines(const char *in)
38	{
39	int lines, i;
40
41	lines=0;
42	for (i=0; in[i]!='\0'; i++) {
43	if (in[i]=='\n') lines++;
44	}
45
46	/* if the last char wasn't a \n there's one more line */
47	if (i>0 && in[i-1]!='\n') lines++;
48
49	return(lines);
50	}
51
52
53	/* caller must free the return */
54	CALLER_OWN char owl_text_htmlstrip(const char in)
55	{
56	const char ptr1, end, ptr2, ptr3;
57	char out, out2;
58
59	out=g_new(char, strlen(in)+30);
60	strcpy(out, "");
61
62	ptr1=in;
63	end=in+strlen(in);
64
65	while(ptr1<end) {
66	/* look for an open bracket */
67	ptr2=strchr(ptr1, '<');
68
69	/* if none, copy in from here to end and exit */
70	if (ptr2==NULL) {
71	strcat(out, ptr1);
72	break;
73	}
74
75	/* otherwise copy in everything before the open bracket */
76	if (ptr2>ptr1) {
77	strncat(out, ptr1, ptr2-ptr1);
78	}
79
80	/* find the close bracket */
81	ptr3=strchr(ptr2, '>');
82
83	/* if there is no close, copy as you are and exit */
84	if (!ptr3) {
85	strcat(out, ptr2);
86	break;
87	}
88
89	/* look for things we know */
90	if (!strncasecmp(ptr2, "<BODY", 5) \|\|
91	!strncasecmp(ptr2, "<FONT", 5) \|\|
92	!strncasecmp(ptr2, "<HTML", 5) \|\|
93	!strncasecmp(ptr2, "</FONT", 6) \|\|
94	!strncasecmp(ptr2, "</HTML", 6) \|\|
95	!strncasecmp(ptr2, "</BODY", 6)) {
96
97	/* advance to beyond the angle brakcet and go again */
98	ptr1=ptr3+1;
99	continue;
100	}
101	if (!strncasecmp(ptr2, "<BR>", 4)) {
102	strcat(out, "\n");
103	ptr1=ptr3+1;
104	continue;
105	}
106
107	/* if it wasn't something we know, copy to the > and go again */
108	strncat(out, ptr2, ptr3-ptr2+1);
109	ptr1=ptr3+1;
110	}
111
112	out2=owl_text_substitute(out, "<", "<");
113	g_free(out);
114	out=owl_text_substitute(out2, ">", ">");
115	g_free(out2);
116	out2=owl_text_substitute(out, "&", "&");
117	g_free(out);
118	out=owl_text_substitute(out2, """, "\"");
119	g_free(out2);
120	out2=owl_text_substitute(out, " ", " ");
121	g_free(out);
122	out=owl_text_substitute(out2, "&ensp;", " ");
123	g_free(out2);
124	out2=owl_text_substitute(out, "&emsp;", " ");
125	g_free(out);
126	out=owl_text_substitute(out2, "&endash;", "--");
127	g_free(out2);
128	out2=owl_text_substitute(out, "&emdash;", "---");
129	g_free(out);
130
131	return(out2);
132	}
133
134	/* Caller must free return */
135	CALLER_OWN char owl_text_expand_tabs(const char in)
136	{
137	int len = 0;
138	const char *p = in;
139	char ret, out;
140	int col;
141
142	col = 0;
143	while(*p) {
144	gunichar c = g_utf8_get_char(p);
145	const char *q = g_utf8_next_char(p);
146	switch (c) {
147	case '\t':
148	do { len++; col++; } while (col % OWL_TAB_WIDTH);
149	p = q;
150	continue;
151	case '\n':
152	col = 0;
153	break;
154	default:
155	col += mk_wcwidth(c);
156	break;
157	}
158	len += q - p;
159	p = q;
160	}
161
162	ret = g_new(char, len + 1);
163
164	p = in;
165	out = ret;
166
167	col = 0;
168	while(*p) {
169	gunichar c = g_utf8_get_char(p);
170	const char *q = g_utf8_next_char(p);
171	switch (c) {
172	case '\t':
173	do {*(out++) = ' '; col++; } while (col % OWL_TAB_WIDTH);
174	p = q;
175	continue;
176	case '\n':
177	col = 0;
178	break;
179	default:
180	col += mk_wcwidth(c);
181	break;
182	}
183	memcpy(out, p, q - p);
184	out += q - p;
185	p = q;
186	}
187
188	*out = 0;
189
190	return ret;
191	}
192
193	/* caller must free the return */
194	CALLER_OWN char owl_text_wordwrap(const char in, int col)
195	{
196	char *out;
197	int cur, lastspace, len, lastnewline;
198
199	out=g_strdup(in);
200	len=strlen(in);
201	cur=0;
202	lastspace=-1;
203	lastnewline=-1;
204
205	while (cur<(len-1)) {
206	if (out[cur]==' ') {
207	lastspace=cur;
208	cur++;
209	continue;
210	} else if (out[cur]=='\n') {
211	lastnewline=cur;
212	cur++;
213	continue;
214	}
215
216	/* do we need to wrap? */
217	if ( (cur-(lastnewline+1)) > col ) {
218	if (lastspace==-1 \|\|
219	(lastnewline>0 && (lastspace<=lastnewline))) {
220	/* we can't help, sorry */
221	cur++;
222	continue;
223	}
224
225	/* turn the last space into a newline */
226	out[lastspace]='\n';
227	lastnewline=lastspace;
228	lastspace=-1;
229	cur++;
230	continue;
231	}
232
233	cur++;
234	continue;
235	}
236	return(out);
237	}
238
239	/* this modifies 'in' */
240	void owl_text_wordunwrap(char *in)
241	{
242	int i, j;
243
244	j=strlen(in);
245	for (i=0; i<j; i++) {
246	if ( (in[i]=='\n') &&
247	((i>0) && (i<(j-1))) &&
248	(in[i-1]!='\n') &&
249	(in[i+1]!='\n') )
250	in[i]=' ';
251	}
252	}
253
254	/* return 1 if a string is only whitespace, otherwise 0 */
255	int only_whitespace(const char *s)
256	{
257	if (g_utf8_validate(s,-1,NULL)) {
258	const char *p;
259	for(p = s; p[0]; p=g_utf8_next_char(p)) {
260	if (!g_unichar_isspace(g_utf8_get_char(p))) return 0;
261	}
262	}
263	else {
264	int i;
265	for (i=0; s[i]; i++) {
266	if (!isspace((int) s[i])) return(0);
267	}
268	}
269	return(1);
270	}
271
272	/* Return a string with any occurances of 'from' replaced with 'to'.
273	* Caller must free returned string.
274	*/
275	CALLER_OWN char owl_text_substitute(const char in, const char from, const char to)
276	{
277	char *split = g_strsplit(in, from, 0), out;
278	out = g_strjoinv(to, split);
279	g_strfreev(split);
280	return out;
281	}
282
283	/* Return a string which is like 'in' except that every instance of
284	* any character in 'toquote' found in 'in' is preceeded by the string
285	* 'quotestr'. For example, owl_text_quote(in, "+*.", "\") would
286	* place a backslash before every '+', '*' or '.' in 'in'. It is
287	* permissable for a character in 'quotestr' to be in 'toquote'.
288	* On success returns the string, on error returns NULL.
289	*/
290	CALLER_OWN char owl_text_quote(const char in, const char toquote, const char quotestr)
291	{
292	int i, x, r, place, escape;
293	int in_len, toquote_len, quotestr_len;
294	char *out;
295
296	in_len=strlen(in);
297	toquote_len=strlen(toquote);
298	quotestr_len=strlen(quotestr);
299	place=0;
300	escape = 0;
301	for (i=0; i<in_len; i++) {
302	if(strchr(toquote, in[i]) != NULL)
303	escape++;
304	}
305	out = g_new(char, in_len + quotestr_len*escape+1);
306	for (i=0; i<in_len; i++) {
307
308	/* check if it's a character that needs quoting */
309	for (x=0; x<toquote_len; x++) {
310	if (in[i]==toquote[x]) {
311	/* quote it */
312	for (r=0; r<quotestr_len; r++) {
313	out[place+r]=quotestr[r];
314	}
315	place+=quotestr_len;
316	break;
317	}
318	}
319
320	/* either way, we now copy over the character */
321	out[place]=in[i];
322	place++;
323	}
324	out[place]='\0';
325	return(out);
326	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: