[systemd.git] / src / basic / escape.c

/***
  This file is part of systemd.

  Copyright 2010 Lennart Poettering

  systemd is free software; you can redistribute it and/or modify it
  under the terms of the GNU Lesser General Public License as published by
  the Free Software Foundation; either version 2.1 of the License, or
  (at your option) any later version.

  systemd is distributed in the hope that it will be useful, but
  WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  Lesser General Public License for more details.

  You should have received a copy of the GNU Lesser General Public License
  along with systemd; If not, see <http://www.gnu.org/licenses/>.
***/

#include <errno.h>
#include <stdlib.h>
#include <string.h>

#include "alloc-util.h"
#include "escape.h"
#include "hexdecoct.h"
#include "macro.h"
#include "utf8.h"

size_t cescape_char(char c, char *buf) {
        char * buf_old = buf;

        switch (c) {

                case '\a':
                        *(buf++) = '\\';
                        *(buf++) = 'a';
                        break;
                case '\b':
                        *(buf++) = '\\';
                        *(buf++) = 'b';
                        break;
                case '\f':
                        *(buf++) = '\\';
                        *(buf++) = 'f';
                        break;
                case '\n':
                        *(buf++) = '\\';
                        *(buf++) = 'n';
                        break;
                case '\r':
                        *(buf++) = '\\';
                        *(buf++) = 'r';
                        break;
                case '\t':
                        *(buf++) = '\\';
                        *(buf++) = 't';
                        break;
                case '\v':
                        *(buf++) = '\\';
                        *(buf++) = 'v';
                        break;
                case '\\':
                        *(buf++) = '\\';
                        *(buf++) = '\\';
                        break;
                case '"':
                        *(buf++) = '\\';
                        *(buf++) = '"';
                        break;
                case '\'':
                        *(buf++) = '\\';
                        *(buf++) = '\'';
                        break;

                default:
                        /* For special chars we prefer octal over
                         * hexadecimal encoding, simply because glib's
                         * g_strescape() does the same */
                        if ((c < ' ') || (c >= 127)) {
                                *(buf++) = '\\';
                                *(buf++) = octchar((unsigned char) c >> 6);
                                *(buf++) = octchar((unsigned char) c >> 3);
                                *(buf++) = octchar((unsigned char) c);
                        } else
                                *(buf++) = c;
                        break;
        }

        return buf - buf_old;
}

char *cescape_length(const char *s, size_t n) {
        const char *f;
        char *r, *t;

        assert(s || n == 0);

        /* Does C style string escaping. May be reversed with
         * cunescape(). */

        r = new(char, n*4 + 1);
        if (!r)
                return NULL;

        for (f = s, t = r; f < s + n; f++)
                t += cescape_char(*f, t);

        *t = 0;

        return r;
}

char *cescape(const char *s) {
        assert(s);

        return cescape_length(s, strlen(s));
}

int cunescape_one(const char *p, size_t length, char32_t *ret, bool *eight_bit) {
        int r = 1;

        assert(p);
        assert(*p);
        assert(ret);

        /* Unescapes C style. Returns the unescaped character in ret.
         * Sets *eight_bit to true if the escaped sequence either fits in
         * one byte in UTF-8 or is a non-unicode literal byte and should
         * instead be copied directly.
         */

        if (length != (size_t) -1 && length < 1)
                return -EINVAL;

        switch (p[0]) {

        case 'a':
                *ret = '\a';
                break;
        case 'b':
                *ret = '\b';
                break;
        case 'f':
                *ret = '\f';
                break;
        case 'n':
                *ret = '\n';
                break;
        case 'r':
                *ret = '\r';
                break;
        case 't':
                *ret = '\t';
                break;
        case 'v':
                *ret = '\v';
                break;
        case '\\':
                *ret = '\\';
                break;
        case '"':
                *ret = '"';
                break;
        case '\'':
                *ret = '\'';
                break;

        case 's':
                /* This is an extension of the XDG syntax files */
                *ret = ' ';
                break;

        case 'x': {
                /* hexadecimal encoding */
                int a, b;

                if (length != (size_t) -1 && length < 3)
                        return -EINVAL;

                a = unhexchar(p[1]);
                if (a < 0)
                        return -EINVAL;

                b = unhexchar(p[2]);
                if (b < 0)
                        return -EINVAL;

                /* Don't allow NUL bytes */
                if (a == 0 && b == 0)
                        return -EINVAL;

                *ret = (a << 4U) | b;
                *eight_bit = true;
                r = 3;
                break;
        }

        case 'u': {
                /* C++11 style 16bit unicode */

                int a[4];
                unsigned i;
                uint32_t c;

                if (length != (size_t) -1 && length < 5)
                        return -EINVAL;

                for (i = 0; i < 4; i++) {
                        a[i] = unhexchar(p[1 + i]);
                        if (a[i] < 0)
                                return a[i];
                }

                c = ((uint32_t) a[0] << 12U) | ((uint32_t) a[1] << 8U) | ((uint32_t) a[2] << 4U) | (uint32_t) a[3];

                /* Don't allow 0 chars */
                if (c == 0)
                        return -EINVAL;

                *ret = c;
                r = 5;
                break;
        }

        case 'U': {
                /* C++11 style 32bit unicode */

                int a[8];
                unsigned i;
                char32_t c;

                if (length != (size_t) -1 && length < 9)
                        return -EINVAL;

                for (i = 0; i < 8; i++) {
                        a[i] = unhexchar(p[1 + i]);
                        if (a[i] < 0)
                                return a[i];
                }

                c = ((uint32_t) a[0] << 28U) | ((uint32_t) a[1] << 24U) | ((uint32_t) a[2] << 20U) | ((uint32_t) a[3] << 16U) |
                    ((uint32_t) a[4] << 12U) | ((uint32_t) a[5] <<  8U) | ((uint32_t) a[6] <<  4U) |  (uint32_t) a[7];

                /* Don't allow 0 chars */
                if (c == 0)
                        return -EINVAL;

                /* Don't allow invalid code points */
                if (!unichar_is_valid(c))
                        return -EINVAL;

                *ret = c;
                r = 9;
                break;
        }

        case '0':
        case '1':
        case '2':
        case '3':
        case '4':
        case '5':
        case '6':
        case '7': {
                /* octal encoding */
                int a, b, c;
                char32_t m;

                if (length != (size_t) -1 && length < 3)
                        return -EINVAL;

                a = unoctchar(p[0]);
                if (a < 0)
                        return -EINVAL;

                b = unoctchar(p[1]);
                if (b < 0)
                        return -EINVAL;

                c = unoctchar(p[2]);
                if (c < 0)
                        return -EINVAL;

                /* don't allow NUL bytes */
                if (a == 0 && b == 0 && c == 0)
                        return -EINVAL;

                /* Don't allow bytes above 255 */
                m = ((uint32_t) a << 6U) | ((uint32_t) b << 3U) | (uint32_t) c;
                if (m > 255)
                        return -EINVAL;

                *ret = m;
                *eight_bit = true;
                r = 3;
                break;
        }

        default:
                return -EINVAL;
        }

        return r;
}

int cunescape_length_with_prefix(const char *s, size_t length, const char *prefix, UnescapeFlags flags, char **ret) {
        char *r, *t;
        const char *f;
        size_t pl;

        assert(s);
        assert(ret);

        /* Undoes C style string escaping, and optionally prefixes it. */

        pl = prefix ? strlen(prefix) : 0;

        r = new(char, pl+length+1);
        if (!r)
                return -ENOMEM;

        if (prefix)
                memcpy(r, prefix, pl);

        for (f = s, t = r + pl; f < s + length; f++) {
                size_t remaining;
                bool eight_bit = false;
                char32_t u;
                int k;

                remaining = s + length - f;
                assert(remaining > 0);

                if (*f != '\\') {
                        /* A literal literal, copy verbatim */
                        *(t++) = *f;
                        continue;
                }

                if (remaining == 1) {
                        if (flags & UNESCAPE_RELAX) {
                                /* A trailing backslash, copy verbatim */
                                *(t++) = *f;
                                continue;
                        }

                        free(r);
                        return -EINVAL;
                }

                k = cunescape_one(f + 1, remaining - 1, &u, &eight_bit);
                if (k < 0) {
                        if (flags & UNESCAPE_RELAX) {
                                /* Invalid escape code, let's take it literal then */
                                *(t++) = '\\';
                                continue;
                        }

                        free(r);
                        return k;
                }

                f += k;
                if (eight_bit)
                        /* One byte? Set directly as specified */
                        *(t++) = u;
                else
                        /* Otherwise encode as multi-byte UTF-8 */
                        t += utf8_encode_unichar(t, u);
        }

        *t = 0;

        *ret = r;
        return t - r;
}

int cunescape_length(const char *s, size_t length, UnescapeFlags flags, char **ret) {
        return cunescape_length_with_prefix(s, length, NULL, flags, ret);
}

int cunescape(const char *s, UnescapeFlags flags, char **ret) {
        return cunescape_length(s, strlen(s), flags, ret);
}

char *xescape(const char *s, const char *bad) {
        char *r, *t;
        const char *f;

        /* Escapes all chars in bad, in addition to \ and all special
         * chars, in \xFF style escaping. May be reversed with
         * cunescape(). */

        r = new(char, strlen(s) * 4 + 1);
        if (!r)
                return NULL;

        for (f = s, t = r; *f; f++) {

                if ((*f < ' ') || (*f >= 127) ||
                    (*f == '\\') || strchr(bad, *f)) {
                        *(t++) = '\\';
                        *(t++) = 'x';
                        *(t++) = hexchar(*f >> 4);
                        *(t++) = hexchar(*f);
                } else
                        *(t++) = *f;
        }

        *t = 0;

        return r;
}

static char *strcpy_backslash_escaped(char *t, const char *s, const char *bad) {
        assert(bad);

        for (; *s; s++) {
                if (*s == '\\' || strchr(bad, *s))
                        *(t++) = '\\';

                *(t++) = *s;
        }

        return t;
}

char *shell_escape(const char *s, const char *bad) {
        char *r, *t;

        r = new(char, strlen(s)*2+1);
        if (!r)
                return NULL;

        t = strcpy_backslash_escaped(r, s, bad);
        *t = 0;

        return r;
}

char *shell_maybe_quote(const char *s) {
        const char *p;
        char *r, *t;

        assert(s);

        /* Encloses a string in double quotes if necessary to make it
         * OK as shell string. */

        for (p = s; *p; p++)
                if (*p <= ' ' ||
                    *p >= 127 ||
                    strchr(SHELL_NEED_QUOTES, *p))
                        break;

        if (!*p)
                return strdup(s);

        r = new(char, 1+strlen(s)*2+1+1);
        if (!r)
                return NULL;

        t = r;
        *(t++) = '"';
        t = mempcpy(t, s, p - s);

        t = strcpy_backslash_escaped(t, p, SHELL_NEED_ESCAPE);

        *(t++)= '"';
        *t = 0;

        return r;
}
Commit	Line	Data
db2df898 MP	1	/***
	2	This file is part of systemd.
	3
	4	Copyright 2010 Lennart Poettering
	5
	6	systemd is free software; you can redistribute it and/or modify it
	7	under the terms of the GNU Lesser General Public License as published by
	8	the Free Software Foundation; either version 2.1 of the License, or
	9	(at your option) any later version.
	10
	11	systemd is distributed in the hope that it will be useful, but
	12	WITHOUT ANY WARRANTY; without even the implied warranty of
	13	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	14	Lesser General Public License for more details.
	15
	16	You should have received a copy of the GNU Lesser General Public License
	17	along with systemd; If not, see <http://www.gnu.org/licenses/>.
	18	***/
	19
4c89c718 MP	20	#include <errno.h>
	21	#include <stdlib.h>
	22	#include <string.h>
	23
db2df898 MP	24	#include "alloc-util.h"
	25	#include "escape.h"
	26	#include "hexdecoct.h"
4c89c718	27	#include "macro.h"
db2df898	28	#include "utf8.h"
db2df898 MP	29
	30	size_t cescape_char(char c, char *buf) {
	31	char * buf_old = buf;
	32
	33	switch (c) {
	34
	35	case '\a':
	36	*(buf++) = '\\';
	37	*(buf++) = 'a';
	38	break;
	39	case '\b':
	40	*(buf++) = '\\';
	41	*(buf++) = 'b';
	42	break;
	43	case '\f':
	44	*(buf++) = '\\';
	45	*(buf++) = 'f';
	46	break;
	47	case '\n':
	48	*(buf++) = '\\';
	49	*(buf++) = 'n';
	50	break;
	51	case '\r':
	52	*(buf++) = '\\';
	53	*(buf++) = 'r';
	54	break;
	55	case '\t':
	56	*(buf++) = '\\';
	57	*(buf++) = 't';
	58	break;
	59	case '\v':
	60	*(buf++) = '\\';
	61	*(buf++) = 'v';
	62	break;
	63	case '\\':
	64	*(buf++) = '\\';
	65	*(buf++) = '\\';
	66	break;
	67	case '"':
	68	*(buf++) = '\\';
	69	*(buf++) = '"';
	70	break;
	71	case '\'':
	72	*(buf++) = '\\';
	73	*(buf++) = '\'';
	74	break;
	75
	76	default:
	77	/* For special chars we prefer octal over
	78	* hexadecimal encoding, simply because glib's
	79	* g_strescape() does the same */
	80	if ((c < ' ') \|\| (c >= 127)) {
	81	*(buf++) = '\\';
	82	*(buf++) = octchar((unsigned char) c >> 6);
	83	*(buf++) = octchar((unsigned char) c >> 3);
	84	*(buf++) = octchar((unsigned char) c);
	85	} else
	86	*(buf++) = c;
	87	break;
	88	}
	89
	90	return buf - buf_old;
	91	}
	92
4c89c718	93	char cescape_length(const char s, size_t n) {
db2df898	94	const char *f;
4c89c718	95	char r, t;
db2df898	96
4c89c718	97	assert(s \|\| n == 0);
db2df898 MP	98
	99	/* Does C style string escaping. May be reversed with
	100	* cunescape(). */
	101
4c89c718	102	r = new(char, n*4 + 1);
db2df898 MP	103	if (!r)
	104	return NULL;
	105
4c89c718	106	for (f = s, t = r; f < s + n; f++)
db2df898 MP	107	t += cescape_char(*f, t);
	108
	109	*t = 0;
	110
	111	return r;
	112	}
	113
4c89c718 MP	114	char cescape(const char s) {
	115	assert(s);
	116
	117	return cescape_length(s, strlen(s));
	118	}
	119
	120	int cunescape_one(const char p, size_t length, char32_t ret, bool *eight_bit) {
db2df898 MP	121	int r = 1;
	122
	123	assert(p);
	124	assert(*p);
	125	assert(ret);
	126
4c89c718 MP	127	/* Unescapes C style. Returns the unescaped character in ret.
	128	* Sets *eight_bit to true if the escaped sequence either fits in
	129	* one byte in UTF-8 or is a non-unicode literal byte and should
	130	* instead be copied directly.
	131	*/
db2df898 MP	132
	133	if (length != (size_t) -1 && length < 1)
	134	return -EINVAL;
	135
	136	switch (p[0]) {
	137
	138	case 'a':
	139	*ret = '\a';
	140	break;
	141	case 'b':
	142	*ret = '\b';
	143	break;
	144	case 'f':
	145	*ret = '\f';
	146	break;
	147	case 'n':
	148	*ret = '\n';
	149	break;
	150	case 'r':
	151	*ret = '\r';
	152	break;
	153	case 't':
	154	*ret = '\t';
	155	break;
	156	case 'v':
	157	*ret = '\v';
	158	break;
	159	case '\\':
	160	*ret = '\\';
	161	break;
	162	case '"':
	163	*ret = '"';
	164	break;
	165	case '\'':
	166	*ret = '\'';
	167	break;
	168
	169	case 's':
	170	/* This is an extension of the XDG syntax files */
	171	*ret = ' ';
	172	break;
	173
	174	case 'x': {
	175	/* hexadecimal encoding */
	176	int a, b;
	177
	178	if (length != (size_t) -1 && length < 3)
	179	return -EINVAL;
	180
	181	a = unhexchar(p[1]);
	182	if (a < 0)
	183	return -EINVAL;
	184
	185	b = unhexchar(p[2]);
	186	if (b < 0)
	187	return -EINVAL;
	188
	189	/* Don't allow NUL bytes */
	190	if (a == 0 && b == 0)
	191	return -EINVAL;
	192
4c89c718 MP	193	*ret = (a << 4U) \| b;
4c89c718 MP	194	*eight_bit = true;
db2df898 MP	195	r = 3;
	196	break;
	197	}
	198
	199	case 'u': {
	200	/* C++11 style 16bit unicode */
	201
	202	int a[4];
	203	unsigned i;
	204	uint32_t c;
	205
	206	if (length != (size_t) -1 && length < 5)
	207	return -EINVAL;
	208
	209	for (i = 0; i < 4; i++) {
	210	a[i] = unhexchar(p[1 + i]);
	211	if (a[i] < 0)
	212	return a[i];
	213	}
	214
	215	c = ((uint32_t) a[0] << 12U) \| ((uint32_t) a[1] << 8U) \| ((uint32_t) a[2] << 4U) \| (uint32_t) a[3];
	216
	217	/* Don't allow 0 chars */
	218	if (c == 0)
	219	return -EINVAL;
	220
4c89c718	221	*ret = c;
db2df898 MP	222	r = 5;
	223	break;
	224	}
	225
	226	case 'U': {
	227	/* C++11 style 32bit unicode */
	228
	229	int a[8];
	230	unsigned i;
4c89c718	231	char32_t c;
db2df898 MP	232
	233	if (length != (size_t) -1 && length < 9)
	234	return -EINVAL;
	235
	236	for (i = 0; i < 8; i++) {
	237	a[i] = unhexchar(p[1 + i]);
	238	if (a[i] < 0)
	239	return a[i];
	240	}
	241
	242	c = ((uint32_t) a[0] << 28U) \| ((uint32_t) a[1] << 24U) \| ((uint32_t) a[2] << 20U) \| ((uint32_t) a[3] << 16U) \|
	243	((uint32_t) a[4] << 12U) \| ((uint32_t) a[5] << 8U) \| ((uint32_t) a[6] << 4U) \| (uint32_t) a[7];
	244
	245	/* Don't allow 0 chars */
	246	if (c == 0)
	247	return -EINVAL;
	248
	249	/* Don't allow invalid code points */
	250	if (!unichar_is_valid(c))
	251	return -EINVAL;
	252
4c89c718	253	*ret = c;
db2df898 MP	254	r = 9;
	255	break;
	256	}
	257
	258	case '0':
	259	case '1':
	260	case '2':
	261	case '3':
	262	case '4':
	263	case '5':
	264	case '6':
	265	case '7': {
	266	/* octal encoding */
	267	int a, b, c;
4c89c718	268	char32_t m;
db2df898 MP	269
	270	if (length != (size_t) -1 && length < 3)
	271	return -EINVAL;
	272
	273	a = unoctchar(p[0]);
	274	if (a < 0)
	275	return -EINVAL;
	276
	277	b = unoctchar(p[1]);
	278	if (b < 0)
	279	return -EINVAL;
	280
	281	c = unoctchar(p[2]);
	282	if (c < 0)
	283	return -EINVAL;
	284
	285	/* don't allow NUL bytes */
	286	if (a == 0 && b == 0 && c == 0)
	287	return -EINVAL;
	288
	289	/* Don't allow bytes above 255 */
	290	m = ((uint32_t) a << 6U) \| ((uint32_t) b << 3U) \| (uint32_t) c;
	291	if (m > 255)
	292	return -EINVAL;
	293
	294	*ret = m;
4c89c718	295	*eight_bit = true;
db2df898 MP	296	r = 3;
	297	break;
	298	}
	299
	300	default:
	301	return -EINVAL;
	302	}
	303
	304	return r;
	305	}
	306
	307	int cunescape_length_with_prefix(const char s, size_t length, const char prefix, UnescapeFlags flags, char **ret) {
	308	char r, t;
	309	const char *f;
	310	size_t pl;
	311
	312	assert(s);
	313	assert(ret);
	314
	315	/* Undoes C style string escaping, and optionally prefixes it. */
	316
	317	pl = prefix ? strlen(prefix) : 0;
	318
	319	r = new(char, pl+length+1);
	320	if (!r)
	321	return -ENOMEM;
	322
	323	if (prefix)
	324	memcpy(r, prefix, pl);
	325
	326	for (f = s, t = r + pl; f < s + length; f++) {
	327	size_t remaining;
4c89c718 MP	328	bool eight_bit = false;
4c89c718 MP	329	char32_t u;
db2df898 MP	330	int k;
	331
	332	remaining = s + length - f;
	333	assert(remaining > 0);
	334
	335	if (*f != '\\') {
	336	/* A literal literal, copy verbatim */
	337	(t++) = f;
	338	continue;
	339	}
	340
	341	if (remaining == 1) {
	342	if (flags & UNESCAPE_RELAX) {
	343	/* A trailing backslash, copy verbatim */
	344	(t++) = f;
	345	continue;
	346	}
	347
	348	free(r);
	349	return -EINVAL;
	350	}
	351
4c89c718	352	k = cunescape_one(f + 1, remaining - 1, &u, &eight_bit);
db2df898 MP	353	if (k < 0) {
	354	if (flags & UNESCAPE_RELAX) {
	355	/* Invalid escape code, let's take it literal then */
	356	*(t++) = '\\';
	357	continue;
	358	}
	359
	360	free(r);
	361	return k;
	362	}
	363
4c89c718 MP	364	f += k;
	365	if (eight_bit)
	366	/* One byte? Set directly as specified */
	367	*(t++) = u;
db2df898	368	else
4c89c718	369	/* Otherwise encode as multi-byte UTF-8 */
db2df898	370	t += utf8_encode_unichar(t, u);
db2df898 MP	371	}
	372
	373	*t = 0;
	374
	375	*ret = r;
	376	return t - r;
	377	}
	378
	379	int cunescape_length(const char s, size_t length, UnescapeFlags flags, char *ret) {
	380	return cunescape_length_with_prefix(s, length, NULL, flags, ret);
	381	}
	382
	383	int cunescape(const char s, UnescapeFlags flags, char *ret) {
	384	return cunescape_length(s, strlen(s), flags, ret);
	385	}
	386
	387	char xescape(const char s, const char *bad) {
	388	char r, t;
	389	const char *f;
	390
	391	/* Escapes all chars in bad, in addition to \ and all special
	392	* chars, in \xFF style escaping. May be reversed with
	393	* cunescape(). */
	394
	395	r = new(char, strlen(s) * 4 + 1);
	396	if (!r)
	397	return NULL;
	398
	399	for (f = s, t = r; *f; f++) {
	400
	401	if ((f < ' ') \|\| (f >= 127) \|\|
	402	(f == '\\') \|\| strchr(bad, f)) {
	403	*(t++) = '\\';
	404	*(t++) = 'x';
	405	(t++) = hexchar(f >> 4);
	406	(t++) = hexchar(f);
	407	} else
	408	(t++) = f;
	409	}
	410
	411	*t = 0;
	412
	413	return r;
	414	}
	415
	416	static char strcpy_backslash_escaped(char t, const char s, const char bad) {
	417	assert(bad);
	418
	419	for (; *s; s++) {
	420	if (s == '\\' \|\| strchr(bad, s))
	421	*(t++) = '\\';
	422
	423	(t++) = s;
	424	}
	425
	426	return t;
	427	}
	428
	429	char shell_escape(const char s, const char *bad) {
	430	char r, t;
	431
	432	r = new(char, strlen(s)*2+1);
	433	if (!r)
	434	return NULL;
435
436	t = strcpy_backslash_escaped(r, s, bad);
437	*t = 0;
438
439	return r;
440	}
441
442	char shell_maybe_quote(const char s) {
443	const char *p;
444	char r, t;
445
446	assert(s);
447
448	/* Encloses a string in double quotes if necessary to make it
449	* OK as shell string. */
450
451	for (p = s; *p; p++)
452	if (*p <= ' ' \|\|
453	*p >= 127 \|\|
454	strchr(SHELL_NEED_QUOTES, *p))
455	break;
456
457	if (!*p)
458	return strdup(s);
459
460	r = new(char, 1+strlen(s)*2+1+1);
461	if (!r)
462	return NULL;
463
464	t = r;
465	*(t++) = '"';
466	t = mempcpy(t, s, p - s);
467
468	t = strcpy_backslash_escaped(t, p, SHELL_NEED_ESCAPE);
469
470	*(t++)= '"';
471	*t = 0;
472
473	return r;
474	}