Context Navigation

source: text.c @ 4ebbfbc

Visit:

release-1.10release-1.9

Last change on this file since 4ebbfbc was 14be3a5, checked in by Jason Gross <jgross@mit.edu>, 13 years ago
Give owl_text_indent an indent_first_line parameter This is useful if you need to indent text that isn't broken into chunks that end with newlines. This is primarily in preparation for the next commit, where we need to replace the indent on the first line by a prefix. The other (reasonable) option is to make owl_text_indent always prefix the string it's given with an indent, even when it's given the empty string. This would break the nice property that indent(A + B) = indent(A) + indent(B) whenever A ended with a newline. After some discussion on zephyr and on the github pull request, I decided to go with this option.
Property mode set to `100644`
File size: 6.8 KB

Rev	Line
[7d4fbcd]	1	#include <stdio.h>
	2	#include <string.h>
	3	#include <stdlib.h>
[995eb4b]	4	#include <ctype.h>
[7d4fbcd]	5	#include "owl.h"
	6
[4d24650]	7	/* Returns a copy of 'in' with each line indented 'n'
[ddbbcffa]	8	* characters. Result must be freed with g_free. */
[14be3a5]	9	CALLER_OWN char owl_text_indent(const char in, int n, bool indent_first_line)
[ab31454]	10	{
[e19eb97]	11	const char ptr1, ptr2, *last;
[4d24650]	12	GString *out = g_string_new("");
[7d4fbcd]	13	int i;
[14be3a5]	14	bool indent_this_line = indent_first_line;
[7d4fbcd]	15
	16	last=in+strlen(in)-1;
	17	ptr1=in;
	18	while (ptr1<=last) {
[14be3a5]	19	if (indent_this_line) {
	20	for (i = 0; i < n; i++) {
	21	g_string_append_c(out, ' ');
	22	}
[7d4fbcd]	23	}
[14be3a5]	24	indent_this_line = true;
[7d4fbcd]	25	ptr2=strchr(ptr1, '\n');
	26	if (!ptr2) {
[4d24650]	27	g_string_append(out, ptr1);
[7d4fbcd]	28	break;
	29	} else {
[4d24650]	30	g_string_append_len(out, ptr1, ptr2-ptr1+1);
[7d4fbcd]	31	}
	32	ptr1=ptr2+1;
	33	}
[4d24650]	34	return g_string_free(out, false);
[7d4fbcd]	35	}
	36
[e19eb97]	37	int owl_text_num_lines(const char *in)
[ab31454]	38	{
[7d4fbcd]	39	int lines, i;
	40
	41	lines=0;
	42	for (i=0; in[i]!='\0'; i++) {
	43	if (in[i]=='\n') lines++;
	44	}
	45
	46	/* if the last char wasn't a \n there's one more line */
[1bb1e67]	47	if (i>0 && in[i-1]!='\n') lines++;
[7d4fbcd]	48
	49	return(lines);
	50	}
	51
[3abf28b]	52
	53	/* caller must free the return */
[6829afc]	54	CALLER_OWN char owl_text_htmlstrip(const char in)
[ab31454]	55	{
[e19eb97]	56	const char ptr1, end, ptr2, ptr3;
[65b2173]	57	char out, out2;
[3abf28b]	58
[96828e4]	59	out=g_new(char, strlen(in)+30);
[3abf28b]	60	strcpy(out, "");
	61
	62	ptr1=in;
	63	end=in+strlen(in);
	64
	65	while(ptr1<end) {
	66	/* look for an open bracket */
	67	ptr2=strchr(ptr1, '<');
	68
[dafd919]	69	/* if none, copy in from here to end and exit */
[3abf28b]	70	if (ptr2==NULL) {
	71	strcat(out, ptr1);
[8d24696]	72	break;
[3abf28b]	73	}
	74
	75	/* otherwise copy in everything before the open bracket */
	76	if (ptr2>ptr1) {
	77	strncat(out, ptr1, ptr2-ptr1);
	78	}
	79
	80	/* find the close bracket */
	81	ptr3=strchr(ptr2, '>');
	82
	83	/* if there is no close, copy as you are and exit */
	84	if (!ptr3) {
	85	strcat(out, ptr2);
[8d24696]	86	break;
[3abf28b]	87	}
	88
	89	/* look for things we know */
[3bcf125]	90	if (!strncasecmp(ptr2, "<BODY", 5) \|\|
[dafd919]	91	!strncasecmp(ptr2, "<FONT", 5) \|\|
[3abf28b]	92	!strncasecmp(ptr2, "<HTML", 5) \|\|
	93	!strncasecmp(ptr2, "</FONT", 6) \|\|
	94	!strncasecmp(ptr2, "</HTML", 6) \|\|
	95	!strncasecmp(ptr2, "</BODY", 6)) {
	96
	97	/* advance to beyond the angle brakcet and go again */
	98	ptr1=ptr3+1;
	99	continue;
	100	}
[75e3879]	101	if (!strncasecmp(ptr2, "<BR>", 4)) {
	102	strcat(out, "\n");
	103	ptr1=ptr3+1;
	104	continue;
	105	}
[3abf28b]	106
	107	/* if it wasn't something we know, copy to the > and go again */
[75e3879]	108	strncat(out, ptr2, ptr3-ptr2+1);
[3abf28b]	109	ptr1=ptr3+1;
	110	}
[8d24696]	111
[e3d9c77]	112	out2=owl_text_substitute(out, "<", "<");
[ddbbcffa]	113	g_free(out);
[e3d9c77]	114	out=owl_text_substitute(out2, ">", ">");
[ddbbcffa]	115	g_free(out2);
[e3d9c77]	116	out2=owl_text_substitute(out, "&", "&");
[ddbbcffa]	117	g_free(out);
[e3d9c77]	118	out=owl_text_substitute(out2, """, "\"");
[ddbbcffa]	119	g_free(out2);
[e3d9c77]	120	out2=owl_text_substitute(out, " ", " ");
[ddbbcffa]	121	g_free(out);
[e3d9c77]	122	out=owl_text_substitute(out2, "&ensp;", " ");
[ddbbcffa]	123	g_free(out2);
[e3d9c77]	124	out2=owl_text_substitute(out, "&emsp;", " ");
[ddbbcffa]	125	g_free(out);
[e3d9c77]	126	out=owl_text_substitute(out2, "&endash;", "--");
[ddbbcffa]	127	g_free(out2);
[e3d9c77]	128	out2=owl_text_substitute(out, "&emdash;", "---");
[ddbbcffa]	129	g_free(out);
[8d24696]	130
	131	return(out2);
[3abf28b]	132	}
[ab31454]	133
[3e8ff1e]	134	/* Caller must free return */
[6829afc]	135	CALLER_OWN char owl_text_expand_tabs(const char in)
[3e8ff1e]	136	{
[f7cd7c9]	137	int len = 0;
[e19eb97]	138	const char *p = in;
[3e8ff1e]	139	char ret, out;
	140	int col;
	141
[f7cd7c9]	142	col = 0;
[3e8ff1e]	143	while(*p) {
[f7cd7c9]	144	gunichar c = g_utf8_get_char(p);
[72ec874]	145	const char *q = g_utf8_next_char(p);
[f7cd7c9]	146	switch (c) {
	147	case '\t':
	148	do { len++; col++; } while (col % OWL_TAB_WIDTH);
	149	p = q;
	150	continue;
	151	case '\n':
	152	col = 0;
	153	break;
	154	default:
	155	col += mk_wcwidth(c);
	156	break;
	157	}
	158	len += q - p;
	159	p = q;
[3e8ff1e]	160	}
	161
[96828e4]	162	ret = g_new(char, len + 1);
[3e8ff1e]	163
	164	p = in;
	165	out = ret;
	166
	167	col = 0;
	168	while(*p) {
[f7cd7c9]	169	gunichar c = g_utf8_get_char(p);
[72ec874]	170	const char *q = g_utf8_next_char(p);
[f7cd7c9]	171	switch (c) {
[3e8ff1e]	172	case '\t':
	173	do {*(out++) = ' '; col++; } while (col % OWL_TAB_WIDTH);
[f7cd7c9]	174	p = q;
	175	continue;
[3e8ff1e]	176	case '\n':
[f7cd7c9]	177	col = 0;
	178	break;
[3e8ff1e]	179	default:
[f7cd7c9]	180	col += mk_wcwidth(c);
	181	break;
[3e8ff1e]	182	}
[f7cd7c9]	183	memcpy(out, p, q - p);
	184	out += q - p;
	185	p = q;
[3e8ff1e]	186	}
	187
	188	*out = 0;
	189
	190	return ret;
	191	}
	192
[ab31454]	193	/* caller must free the return */
[6829afc]	194	CALLER_OWN char owl_text_wordwrap(const char in, int col)
[ab31454]	195	{
	196	char *out;
	197	int cur, lastspace, len, lastnewline;
	198
[d4927a7]	199	out=g_strdup(in);
[ab31454]	200	len=strlen(in);
	201	cur=0;
	202	lastspace=-1;
	203	lastnewline=-1;
	204
	205	while (cur<(len-1)) {
	206	if (out[cur]==' ') {
	207	lastspace=cur;
	208	cur++;
	209	continue;
	210	} else if (out[cur]=='\n') {
	211	lastnewline=cur;
	212	cur++;
	213	continue;
	214	}
	215
	216	/* do we need to wrap? */
	217	if ( (cur-(lastnewline+1)) > col ) {
	218	if (lastspace==-1 \|\|
	219	(lastnewline>0 && (lastspace<=lastnewline))) {
	220	/* we can't help, sorry */
	221	cur++;
	222	continue;
	223	}
	224
	225	/* turn the last space into a newline */
	226	out[lastspace]='\n';
	227	lastnewline=lastspace;
	228	lastspace=-1;
	229	cur++;
	230	continue;
	231	}
	232
	233	cur++;
	234	continue;
	235	}
	236	return(out);
	237	}
[e3d9c77]	238
[f82e233]	239	/* this modifies 'in' */
	240	void owl_text_wordunwrap(char *in)
	241	{
	242	int i, j;
	243
	244	j=strlen(in);
	245	for (i=0; i<j; i++) {
	246	if ( (in[i]=='\n') &&
	247	((i>0) && (i<(j-1))) &&
	248	(in[i-1]!='\n') &&
	249	(in[i+1]!='\n') )
	250	in[i]=' ';
	251	}
	252	}
	253
[e3d9c77]	254	/* return 1 if a string is only whitespace, otherwise 0 */
[e19eb97]	255	int only_whitespace(const char *s)
[e3d9c77]	256	{
[28ee32b]	257	if (g_utf8_validate(s,-1,NULL)) {
[e19eb97]	258	const char *p;
[28ee32b]	259	for(p = s; p[0]; p=g_utf8_next_char(p)) {
	260	if (!g_unichar_isspace(g_utf8_get_char(p))) return 0;
	261	}
	262	}
	263	else {
	264	int i;
	265	for (i=0; s[i]; i++) {
	266	if (!isspace((int) s[i])) return(0);
	267	}
[e3d9c77]	268	}
	269	return(1);
	270	}
	271
	272	/* Return a string with any occurances of 'from' replaced with 'to'.
	273	* Caller must free returned string.
	274	*/
[6829afc]	275	CALLER_OWN char owl_text_substitute(const char in, const char from, const char to)
[e3d9c77]	276	{
[42ee1be]	277	char *split = g_strsplit(in, from, 0), out;
	278	out = g_strjoinv(to, split);
	279	g_strfreev(split);
	280	return out;
[e3d9c77]	281	}
	282
	283	/* Return a string which is like 'in' except that every instance of
	284	* any character in 'toquote' found in 'in' is preceeded by the string
	285	* 'quotestr'. For example, owl_text_quote(in, "+*.", "\") would
	286	* place a backslash before every '+', '*' or '.' in 'in'. It is
	287	* permissable for a character in 'quotestr' to be in 'toquote'.
	288	* On success returns the string, on error returns NULL.
	289	*/
[6829afc]	290	CALLER_OWN char owl_text_quote(const char in, const char toquote, const char quotestr)
[e3d9c77]	291	{
[72db971]	292	int i, x, r, place, escape;
[e3d9c77]	293	int in_len, toquote_len, quotestr_len;
	294	char *out;
	295
	296	in_len=strlen(in);
	297	toquote_len=strlen(toquote);
	298	quotestr_len=strlen(quotestr);
	299	place=0;
[72db971]	300	escape = 0;
	301	for (i=0; i<in_len; i++) {
	302	if(strchr(toquote, in[i]) != NULL)
	303	escape++;
	304	}
[96828e4]	305	out = g_new(char, in_len + quotestr_len*escape+1);
[e3d9c77]	306	for (i=0; i<in_len; i++) {
	307
	308	/* check if it's a character that needs quoting */
	309	for (x=0; x<toquote_len; x++) {
	310	if (in[i]==toquote[x]) {
	311	/* quote it */
	312	for (r=0; r<quotestr_len; r++) {
	313	out[place+r]=quotestr[r];
	314	}
	315	place+=quotestr_len;
	316	break;
	317	}
	318	}
	319
	320	/* either way, we now copy over the character */
	321	out[place]=in[i];
	322	place++;
	323	}
	324	out[place]='\0';
	325	return(out);
	326	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: