regenc.h - Oniguruma (regular expression library)\r
**********************************************************************/\r
/*-\r
- * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>\r
+ * Copyright (c) 2002-2019 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>\r
* All rights reserved.\r
*\r
* Redistribution and use in source and binary forms, with or without\r
/* #define USE_CRNL_AS_LINE_TERMINATOR */\r
#define USE_UNICODE_PROPERTIES\r
#define USE_UNICODE_EXTENDED_GRAPHEME_CLUSTER\r
+#define USE_UNICODE_WORD_BREAK\r
/* #define USE_UNICODE_CASE_FOLD_TURKISH_AZERI */\r
/* #define USE_UNICODE_ALL_LINE_TERMINATORS */ /* see Unicode.org UTS #18 */\r
\r
#define ONIG_ENCODING_INIT_DEFAULT ONIG_ENCODING_ASCII\r
\r
\r
+#define ENC_SKIP_OFFSET_1_OR_0 7\r
+\r
#define ENC_FLAG_ASCII_COMPATIBLE (1<<0)\r
#define ENC_FLAG_UNICODE (1<<1)\r
+#define ENC_FLAG_SKIP_OFFSET_MASK (7<<2)\r
+#define ENC_FLAG_SKIP_OFFSET_0 0\r
+#define ENC_FLAG_SKIP_OFFSET_1 (1<<2)\r
+#define ENC_FLAG_SKIP_OFFSET_2 (2<<2)\r
+#define ENC_FLAG_SKIP_OFFSET_3 (3<<2)\r
+#define ENC_FLAG_SKIP_OFFSET_4 (4<<2)\r
+#define ENC_FLAG_SKIP_OFFSET_1_OR_0 (ENC_SKIP_OFFSET_1_OR_0<<2)\r
+\r
+#define ENC_GET_SKIP_OFFSET(enc) \\r
+ (((enc)->flag & ENC_FLAG_SKIP_OFFSET_MASK)>>2)\r
\r
\r
/* for encoding system implementation (internal) */\r
extern struct PropertyNameCtype* onigenc_euc_jp_lookup_property_name P_((register const char *str, register size_t len));\r
extern struct PropertyNameCtype* onigenc_sjis_lookup_property_name P_((register const char *str, register size_t len));\r
\r
-/* in enc/unicode.c */\r
+/* in unicode.c */\r
extern int onigenc_unicode_is_code_ctype P_((OnigCodePoint code, unsigned int ctype));\r
extern int onigenc_utf16_32_get_ctype_code_range P_((OnigCtype ctype, OnigCodePoint *sb_out, const OnigCodePoint* ranges[]));\r
extern int onigenc_unicode_ctype_code_range P_((OnigCtype ctype, const OnigCodePoint* ranges[]));\r
extern int onigenc_unicode_get_case_fold_codes_by_str P_((OnigEncoding enc, OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]));\r
extern int onigenc_unicode_mbc_case_fold P_((OnigEncoding enc, OnigCaseFoldType flag, const UChar** pp, const UChar* end, UChar* fold));\r
extern int onigenc_unicode_apply_all_case_fold P_((OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg));\r
+\r
extern int onigenc_egcb_is_break_position P_((OnigEncoding enc, UChar* p, UChar* prev, const UChar* start, const UChar* end));\r
\r
+#ifdef USE_UNICODE_WORD_BREAK\r
+extern int onigenc_wb_is_break_position P_((OnigEncoding enc, UChar* p, UChar* prev, const UChar* start, const UChar* end));\r
+#endif\r
\r
#define UTF16_IS_SURROGATE_FIRST(c) (((c) & 0xfc) == 0xd8)\r
#define UTF16_IS_SURROGATE_SECOND(c) (((c) & 0xfc) == 0xdc)\r
#define ONIGENC_IS_ASCII_CODE_CASE_AMBIG(code) \\r
(ONIGENC_IS_ASCII_CODE_CTYPE(code, ONIGENC_CTYPE_UPPER) ||\\r
ONIGENC_IS_ASCII_CODE_CTYPE(code, ONIGENC_CTYPE_LOWER))\r
- \r
+\r
#define ONIGENC_IS_UNICODE_ENCODING(enc) \\r
(((enc)->flag & ENC_FLAG_UNICODE) != 0)\r
\r