54 'printjoins' => array(46, 45, 95, 58, 47, 39),
55 'casesensitive' => FALSE,
57 'removeChars' => array(45)
80 $this->debugString =
'';
82 if (!$this->lexerConf[
'casesensitive']) {
83 $wordString = $this->csObj->conv_case(
'utf-8', $wordString,
'toLower');
90 $this->debugString =
'';
92 list($start, $len) = $this->
get_word($wordString, $pos);
94 $this->
addWords($words, $wordString, $start, $len);
96 $this->debugString .=
'<span style="color:red">' . htmlspecialchars(substr($wordString, $pos, ($start - $pos))) .
'</span>' . htmlspecialchars(substr($wordString, $start, $len));
122 public function addWords(&$words, &$wordString, $start, $len) {
124 $theWord = substr($wordString, $start, $len);
127 $cp = $this->
utf8_ord($theWord, $bc);
128 list($cType) = $this->
charType($cp);
141 if ($cType ==
'cjk') {
143 $strlen = $this->csObj->utf8_strlen($theWord);
145 for ($a = 0; $a < $strlen; $a++) {
146 if ($strlen == 1 || $a < $strlen - 1) {
147 $words[] = $this->csObj->utf8_substr($theWord, $a, 2);
153 foreach ($this->lexerConf[
'removeChars'] as $skipJoin) {
154 $theWord = str_replace($this->csObj->UnumberToChar($skipJoin),
'', $theWord);
173 return array($pos, $len);
177 if ($str[$pos] ==
'') {
182 return array($pos, $len);
198 $cType = ($cType_prev = FALSE);
202 if ($str[$pos] ==
'') {
213 if (!in_array($cp, $this->lexerConf[
'printjoins'])) {
216 $len = $printJoinLgd;
221 if (!$printJoinLgd) {
222 $printJoinLgd = $len;
229 } elseif (!$letter && $cType) {
236 if ($str[$pos] ==
'') {
241 $cp = $this->
utf8_ord($str, $bc, $pos);
244 $cType_prev = $cType;
245 list($cType) = $this->
charType($cp);
266 if ($cp >= 48 && $cp <= 57) {
270 if ($cp >= 65 && $cp <= 90 || $cp >= 97 && $cp <= 122 || $cp >= 192 && $cp <= 255 && $cp != 215 && $cp != 247 || $cp >= 256 && $cp < 640 || ($cp == 902 || $cp >= 904 && $cp < 1024) || ($cp >= 1024 && $cp < 1154 || $cp >= 1162 && $cp < 1328) || ($cp >= 1424 && $cp < 1456 || $cp >= 1488 && $cp < 1523) || ($cp >= 1569 && $cp <= 1624 || $cp >= 1646 && $cp <= 1747) || $cp >= 7680 && $cp < 8192) {
271 return array(
'alpha');
276 if ($cp >= 12352 && $cp <= 12543 || $cp >= 12592 && $cp <= 12687 || $cp >= 13312 && $cp <= 19903 || $cp >= 19968 && $cp <= 40879 || $cp >= 44032 && $cp <= 55215 || $cp >= 131072 && $cp <= 195103) {
291 public function utf8_ord(&$str, &$len, $pos = 0, $hex = FALSE) {
292 $ord = ord($str[$pos]);
295 for ($bc = -1, $mbs = $ord; $mbs & 128; $mbs = $mbs << 1) {
300 $ord = $ord & (1 << 6 - $bc) - 1;
303 for ($i = $pos + 1; $bc; $bc--, $i++) {
304 $ord = $ord << 6 | ord($str[$i]) & 63;
307 return $hex ?
'x' . dechex($ord) : $ord;
static makeInstance($className)
utf8_ord(&$str, &$len, $pos=0, $hex=FALSE)
utf8_is_letter(&$str, &$len, $pos=0)
addWords(&$words, &$wordString, $start, $len)
debug($variable='', $name=' *variable *', $line=' *line *', $file=' *file *', $recursiveDepth=3, $debugLevel=E_DEBUG)
static inList($list, $item)