yaft/parse.h

/* See LICENSE for licence details. */
void (*ctrl_func[CTRL_CHARS])(struct terminal * term, void *arg) = {
	[BS]  = bs,
	[HT]  = tab,
	[LF]  = nl,
	[VT]  = nl,
	[FF]  = nl,
	[CR]  = cr,
	[ESC] = enter_esc,
};

void (*esc_func[ESC_CHARS])(struct terminal * term, void *arg) = {
	['7'] = save_state,
	['8'] = restore_state,
	['D'] = nl,
	['E'] = crnl,
	['H'] = set_tabstop,
	['M'] = reverse_nl,
	['P'] = enter_dcs,
	['Z'] = identify,
	['['] = enter_csi,
	[']'] = enter_osc,
	['c'] = ris,
};

void (*csi_func[ESC_CHARS])(struct terminal * term, void *arg) = {
	['@'] = insert_blank,
	['A'] = curs_up,
	['B'] = curs_down,
	['C'] = curs_forward,
	['D'] = curs_back,
	['E'] = curs_nl,
	['F'] = curs_pl,
	['G'] = curs_col,
	['H'] = curs_pos,
	['J'] = erase_display,
	['K'] = erase_line,
	['L'] = insert_line,
	['M'] = delete_line,
	['P'] = delete_char,
	['X'] = erase_char,
	['a'] = curs_forward,
	['c'] = identify,
	['d'] = curs_line,
	['e'] = curs_down,
	['f'] = curs_pos,
	['g'] = clear_tabstop,
	['h'] = set_mode,
	['l'] = reset_mode,
	['m'] = set_attr,
	['n'] = status_report,
	['r'] = set_margin,
	['s'] = save_state,
	['u'] = restore_state,
	['`'] = curs_col,
};

void (*dcs_func[ESC_CHARS])(struct terminal * term, void *arg) = {
	['{'] = decdld_header,
};

void control_character(struct terminal *term, uint8_t ch)
{
	static const char *ctrl_char[] = {
		"NUL", "SOH", "STX", "ETX", "EOT", "ENQ", "ACK", "BEL",
		"BS ", "HT ", "LF ", "VT ", "FF ", "CR ", "SO ", "SI ",
		"DLE", "DC1", "DC2", "DC3", "DC4", "NAK", "SYN", "ETB",
		"CAN", "EM ", "SUB", "ESC", "FS ", "GS ", "RS ", "US ",
	};

	if (DEBUG)
		fprintf(stderr, "ctl: %s\n", ctrl_char[ch]);

	if (ctrl_func[ch])
		ctrl_func[ch](term, NULL);
}

void esc_sequence(struct terminal *term, uint8_t ch)
{
	if (DEBUG)
		fprintf(stderr, "esc: ESC %s\n", term->esc.buf);

	if (strlen(term->esc.buf) == 1 && esc_func[ch])
		esc_func[ch](term, NULL);

	if (ch != '[' && ch != ']')
		reset_esc(term);
}

void csi_sequence(struct terminal *term, uint8_t ch)
{
	struct parm_t parm;

	if (DEBUG)
		fprintf(stderr, "csi: CSI %s\n", term->esc.buf);

	reset_parm(&parm);
	parse_arg(term->esc.buf + 1, &parm, ';', isdigit); /* skip '[' */
	*(term->esc.bp - 1) = '\0'; /* omit final character */

	if (csi_func[ch])
		csi_func[ch](term, &parm);

	reset_esc(term);
}

void osc_sequence(struct terminal *term, uint8_t ch)
{
	int i, osc_mode;
	struct parm_t parm;

	if (DEBUG)
		fprintf(stderr, "osc: OSC %s\n", term->esc.buf);

	reset_parm(&parm);
	parse_arg(term->esc.buf + 1, &parm, ';', is_osc_parm); /* skip ']' */
	if (*(term->esc.bp - 1) == BACKSLASH) /* ST: ESC BACKSLASH */
		*(term->esc.bp - 2) = '\0';
	*(term->esc.bp - 1) = '\0'; /* omit final character */

	if (DEBUG)
		for (i = 0; i < parm.argc; i++)
			fprintf(stderr, "\targv[%d]: %s\n", i, parm.argv[i]);

	if (parm.argc > 0) {
		osc_mode = atoi(parm.argv[0]);
		if (DEBUG)
			fprintf(stderr, "osc_mode:%d\n", osc_mode);

		if (osc_mode == 4)
			set_palette(term, &parm);
		else if (osc_mode == 104)
			reset_palette(term, &parm);
		else if (osc_mode == 8900)
			glyph_width_report(term, &parm);
	}

	reset_esc(term);
}

void dcs_sequence(struct terminal *term, uint8_t ch)
{
	struct parm_t parm;

	if (DEBUG)
		fprintf(stderr, "dcs: DCS %s\n", term->esc.buf);

	reset_parm(&parm);
	parse_arg(term->esc.buf, &parm, ';', isdigit);
	*(term->esc.bp - 1) = '\0'; /* omit final character */

	if (dcs_func[ch])
		dcs_func[ch](term, &parm);

	if (ch != '{')
		reset_esc(term);
}

void utf8_character(struct terminal *term, uint8_t ch)
{
	if (0x80 <= ch && ch <= 0xBF) {
		/* check illegal UTF-8 sequence
			* ? byte sequence: first byte must be between 0xC2 ~ 0xFD
			* 2 byte sequence: first byte must be between 0xC2 ~ 0xDF
 			* 3 byte sequence: second byte following 0xE0 must be between 0xA0 ~ 0xBF
 			* 4 byte sequence: second byte following 0xF0 must be between 0x90 ~ 0xBF
 			* 5 byte sequence: second byte following 0xF8 must be between 0x88 ~ 0xBF
 			* 6 byte sequence: second byte following 0xFC must be between 0x84 ~ 0xBF
		*/
		if ((term->ucs.following_byte == 0)
			|| (term->ucs.following_byte == 1 && term->ucs.count == 0 && term->ucs.code <= 1)
			|| (term->ucs.following_byte == 2 && term->ucs.count == 0 && term->ucs.code == 0 && ch < 0xA0)
			|| (term->ucs.following_byte == 3 && term->ucs.count == 0 && term->ucs.code == 0 && ch < 0x90)
			|| (term->ucs.following_byte == 4 && term->ucs.count == 0 && term->ucs.code == 0 && ch < 0x88)
			|| (term->ucs.following_byte == 5 && term->ucs.count == 0 && term->ucs.code == 0 && ch < 0x84))
			term->ucs.is_valid = false;

		term->ucs.code <<= 6;
		term->ucs.code += ch & 0x3F;
		term->ucs.count++;
	}
	else if (0xC0 <= ch && ch <= 0xDF) {
		term->ucs.following_byte = 1;
		term->ucs.count = 0;
		term->ucs.code = ch & 0x1F;
		return;
	}
	else if (0xE0 <= ch && ch <= 0xEF) {
		term->ucs.following_byte = 2;
		term->ucs.count = 0;
		term->ucs.code = ch & 0x0F;
		return;
	}
	else if (0xF0 <= ch && ch <= 0xF7) {
		term->ucs.following_byte = 3;
		term->ucs.count = 0;
		term->ucs.code = ch & 0x07;
		return;
	}
	else if (0xF8 <= ch && ch <= 0xFB) {
		term->ucs.following_byte = 4;
		term->ucs.count = 0;
		term->ucs.code = ch & 0x03;
		return;
	}
	else if (0xFC <= ch && ch <= 0xFD) {
		term->ucs.following_byte = 5;
		term->ucs.count = 0;
		term->ucs.code = ch & 0x01;
		return;
	}
	else { /* 0xFE - 0xFF: not used in UTF-8 */
		addch(term, REPLACEMENT_CHAR);
		reset_ucs(term);
		return;
	}

	if (term->ucs.count >= term->ucs.following_byte) {
		/*	illegal code point (ref: http://www.unicode.org/reports/tr27/tr27-4.html)
			0xD800   ~ 0xDFFF : surrogate pair
			0xFDD0   ~ 0xFDEF : noncharacter
			0xnFFFE  ~ 0xnFFFF: noncharacter (n: 0x00 ~ 0x10)
			0x110000 ~        : invalid (unicode U+0000 ~ U+10FFFF)
		*/
		if (!term->ucs.is_valid
			|| (0xD800 <= term->ucs.code && term->ucs.code <= 0xDFFF)
			|| (0xFDD0 <= term->ucs.code && term->ucs.code <= 0xFDEF)
			|| ((term->ucs.code & 0xFFFF) == 0xFFFE || (term->ucs.code & 0xFFFF) == 0xFFFF)
			|| (term->ucs.code > 0x10FFFF))
			addch(term, REPLACEMENT_CHAR);
		else
			addch(term, term->ucs.code);

		reset_ucs(term);
	}
}

void parse(struct terminal *term, uint8_t *buf, int size)
{
	uint8_t ch;
	int i;
	/*
		CTRL CHARS      : 0x00 ~ 0x1F
		ASCII(printable): 0x20 ~ 0x7E
		CTRL CHARS(DEL) : 0x7F
		UTF-8           : 0x80 ~ 0xFF
	*/

	for (i = 0; i < size; i++) {
		ch = buf[i];
		if (term->esc.state == STATE_RESET) {
			if (term->ucs.following_byte > 0 && (ch < 0x80 || ch > 0xBF)) { /* interrupt */
				addch(term, REPLACEMENT_CHAR);
				reset_ucs(term);
			}

			if (ch <= 0x1F)
				control_character(term, ch);
			else if (ch <= 0x7F)
				addch(term, ch);
			else
				utf8_character(term, ch);
		}
		else if (term->esc.state == STATE_ESC) {
			if (push_esc(term, ch))
				esc_sequence(term, ch);
		}
		else if (term->esc.state == STATE_CSI) {
			if (push_esc(term, ch))
				csi_sequence(term, ch);
		}
		else if (term->esc.state == STATE_OSC) {
			if (push_esc(term, ch))
				osc_sequence(term, ch);
		}
		else if (term->esc.state == STATE_DCS) {
			if (push_esc(term, ch))
				dcs_sequence(term, ch);
		}
	}
}