Drizzled Public API Documentation

ctype-bin.cc
1 /* Copyright (C) 2002 MySQL AB & tommy@valley.ne.jp.
2 
3  This library is free software; you can redistribute it and/or
4  modify it under the terms of the GNU Library General Public
5  License as published by the Free Software Foundation; version 2
6  of the License.
7 
8  This library is distributed in the hope that it will be useful,
9  but WITHOUT ANY WARRANTY; without even the implied warranty of
10  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11  Library General Public License for more details.
12 
13  You should have received a copy of the GNU Library General Public
14  License along with this library; if not, write to the Free
15  Software Foundation, Inc., 51 Franklin Place - Suite 330, Boston,
16  MA 02110-1301, USA */
17 
18 /* This file is for binary pseudo charset, created by bar@mysql.com */
19 
20 
21 #include <config.h>
22 #include <drizzled/definitions.h>
23 #include <drizzled/internal/m_string.h>
24 #include <drizzled/charset.h>
25 
26 #include <algorithm>
27 
28 using namespace std;
29 
30 namespace drizzled {
31 
32 void my_hash_sort_bin(const charset_info_st * const,
33  const unsigned char *key, size_t len,
34  uint32_t *nr1, uint32_t *nr2);
35 
36 
37 static unsigned char ctype_bin[]=
38 {
39  0,
40  32, 32, 32, 32, 32, 32, 32, 32, 32, 40, 40, 40, 40, 40, 32, 32,
41  32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
42  72, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
43  132,132,132,132,132,132,132,132,132,132, 16, 16, 16, 16, 16, 16,
44  16,129,129,129,129,129,129, 1, 1, 1, 1, 1, 1, 1, 1, 1,
45  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 16, 16, 16, 16, 16,
46  16,130,130,130,130,130,130, 2, 2, 2, 2, 2, 2, 2, 2, 2,
47  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 16, 16, 16, 16, 32,
48  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
49  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
50  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
51  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
52  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
53  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
54  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
55  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
56 };
57 
58 
59 /* Dummy array for toupper / tolower / sortorder */
60 
61 static unsigned char bin_char_array[] =
62 {
63  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
64  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
65  32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
66  48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
67  64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
68  80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
69  96, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111,
70  112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
71  128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
72  144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
73  160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
74  176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
75  192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
76  208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,
77  224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,
78  240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255
79 };
80 
81 
82 int my_strnncoll_binary(const charset_info_st * const,
83  const unsigned char *s, size_t slen,
84  const unsigned char *t, size_t tlen,
85  bool t_is_prefix)
86 {
87  size_t len= min(slen,tlen);
88  int cmp= memcmp(s,t,len);
89  return cmp ? cmp : static_cast<int>((t_is_prefix ? len : slen) - tlen);
90 }
91 
92 
93 size_t my_lengthsp_binary(const charset_info_st * const,
94  const char *, size_t length)
95 {
96  return length;
97 }
98 
99 
100 /*
101  Compare two strings. Result is sign(first_argument - second_argument)
102 
103  SYNOPSIS
104  my_strnncollsp_binary()
105  cs Chararacter set
106  s String to compare
107  slen Length of 's'
108  t String to compare
109  tlen Length of 't'
110 
111  NOTE
112  This function is used for real binary strings, i.e. for
113  BLOB, BINARY(N) and VARBINARY(N).
114  It compares trailing spaces as spaces.
115 
116  RETURN
117  < 0 s < t
118  0 s == t
119  > 0 s > t
120 */
121 
122 int my_strnncollsp_binary(const charset_info_st * const cs,
123  const unsigned char *s, size_t slen,
124  const unsigned char *t, size_t tlen,
125  bool)
126 {
127  return my_strnncoll_binary(cs,s,slen,t,tlen,0);
128 }
129 
130 
131 int my_strnncoll_8bit_bin(const charset_info_st * const,
132  const unsigned char *s, size_t slen,
133  const unsigned char *t, size_t tlen,
134  bool t_is_prefix)
135 {
136  size_t len= min(slen,tlen);
137  int cmp= memcmp(s,t,len);
138  return cmp ? cmp : static_cast<int>((t_is_prefix ? len : slen) - tlen);
139 }
140 
141 
142 /*
143  Compare two strings. Result is sign(first_argument - second_argument)
144 
145  SYNOPSIS
146  my_strnncollsp_8bit_bin()
147  cs Chararacter set
148  s String to compare
149  slen Length of 's'
150  t String to compare
151  tlen Length of 't'
152  diff_if_only_endspace_difference
153  Set to 1 if the strings should be regarded as different
154  if they only difference in end space
155 
156  NOTE
157  This function is used for character strings with binary collations.
158  The shorter string is extended with end space to be as long as the longer
159  one.
160 
161  RETURN
162  < 0 s < t
163  0 s == t
164  > 0 s > t
165 */
166 
167 int my_strnncollsp_8bit_bin(const charset_info_st * const,
168  const unsigned char *a, size_t a_length,
169  const unsigned char *b, size_t b_length,
170  bool diff_if_only_endspace_difference)
171 {
172  const unsigned char *end;
173  size_t length;
174  int res;
175 
176 #ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE
177  diff_if_only_endspace_difference= 0;
178 #endif
179 
180  end= a + (length= min(a_length, b_length));
181  while (a < end)
182  {
183  if (*a++ != *b++)
184  return a[-1] - b[-1];
185  }
186  res= 0;
187  if (a_length != b_length)
188  {
189  int swap= 1;
190  /*
191  Check the next not space character of the longer key. If it's < ' ',
192  then it's smaller than the other key.
193  */
194  if (diff_if_only_endspace_difference)
195  res= 1; /* Assume 'a' is bigger */
196  if (a_length < b_length)
197  {
198  /* put shorter key in s */
199  a_length= b_length;
200  a= b;
201  swap= -1; /* swap sign of result */
202  res= -res;
203  }
204  for (end= a + a_length-length; a < end ; a++)
205  {
206  if (*a != ' ')
207  return (*a < ' ') ? -swap : swap;
208  }
209  }
210  return res;
211 }
212 
213 
214 /* This function is used for all conversion functions */
215 
216 size_t my_case_str_bin(const charset_info_st * const, char *)
217 {
218  return 0;
219 }
220 
221 
222 size_t my_case_bin(const charset_info_st * const, char *,
223  size_t srclen, char *, size_t)
224 {
225  return srclen;
226 }
227 
228 
229 int my_strcasecmp_bin(const charset_info_st * const,
230  const char *s, const char *t)
231 {
232  return strcmp(s,t);
233 }
234 
235 
236 uint32_t my_mbcharlen_8bit(const charset_info_st * const, uint32_t)
237 {
238  return 1;
239 }
240 
241 
242 int my_mb_wc_bin(const charset_info_st * const,
243  my_wc_t *wc, const unsigned char *str,
244  const unsigned char *end)
245 {
246  if (str >= end)
247  return MY_CS_TOOSMALL;
248 
249  *wc=str[0];
250  return 1;
251 }
252 
253 
254 int my_wc_mb_bin(const charset_info_st * const, my_wc_t wc,
255  unsigned char *str, unsigned char *end)
256 {
257  if (str >= end)
258  return MY_CS_TOOSMALL;
259 
260  if (wc < 256)
261  {
262  str[0]= wc;
263  return 1;
264  }
265  return MY_CS_ILUNI;
266 }
267 
268 
269 void my_hash_sort_8bit_bin(const charset_info_st * const,
270  const unsigned char *key, size_t len,
271  uint32_t *nr1, uint32_t *nr2)
272 {
273  const unsigned char *pos = key;
274 
275  /*
276  Remove trailing spaces. We have to do this to be able to compare
277  'A ' and 'A' as identical
278  */
279  key= internal::skip_trailing_space(key, len);
280 
281  for (; pos < key ; pos++)
282  {
283  nr1[0]^= (((nr1[0] & 63) + nr2[0]) * *pos) + (nr1[0] << 8);
284  nr2[0]+=3;
285  }
286 }
287 
288 
289 void my_hash_sort_bin(const charset_info_st * const,
290  const unsigned char *key, size_t len,
291  uint32_t *nr1, uint32_t *nr2)
292 {
293  const unsigned char *pos = key;
294 
295  key+= len;
296 
297  for (; pos < key ; pos++)
298  {
299  nr1[0]^= (((nr1[0] & 63) + nr2[0]) * *pos) + (nr1[0] << 8);
300  nr2[0]+=3;
301  }
302 }
303 
304 
305 /*
306  The following defines is here to keep the following code identical to
307  the one in ctype-simple.c
308 */
309 
310 #define likeconv(s,A) (A)
311 #define INC_PTR(cs,A,B) (A)++
312 
313 
314 int my_wildcmp_bin(const charset_info_st * const cs,
315  const char *str,const char *str_end,
316  const char *wildstr,const char *wildend,
317  int escape, int w_one, int w_many)
318 {
319  int result= -1; /* Not found, using wildcards */
320 
321  while (wildstr != wildend)
322  {
323  while (*wildstr != w_many && *wildstr != w_one)
324  {
325  if (*wildstr == escape && wildstr+1 != wildend)
326  wildstr++;
327  if (str == str_end || likeconv(cs,*wildstr++) != likeconv(cs,*str++))
328  return 1; /* No match */
329  if (wildstr == wildend)
330  return(str != str_end); /* Match if both are at end */
331  result=1; /* Found an anchor char */
332  }
333  if (*wildstr == w_one)
334  {
335  do
336  {
337  if (str == str_end) /* Skip one char if possible */
338  return(result);
339  INC_PTR(cs,str,str_end);
340  } while (++wildstr < wildend && *wildstr == w_one);
341  if (wildstr == wildend)
342  break;
343  }
344  if (*wildstr == w_many)
345  { /* Found w_many */
346  unsigned char cmp;
347  wildstr++;
348  /* Remove any '%' and '_' from the wild search string */
349  for (; wildstr != wildend ; wildstr++)
350  {
351  if (*wildstr == w_many)
352  continue;
353  if (*wildstr == w_one)
354  {
355  if (str == str_end)
356  return(-1);
357  INC_PTR(cs,str,str_end);
358  continue;
359  }
360  break; /* Not a wild character */
361  }
362  if (wildstr == wildend)
363  return 0; /* match if w_many is last */
364  if (str == str_end)
365  return(-1);
366 
367  if ((cmp= *wildstr) == escape && wildstr+1 != wildend)
368  cmp= *++wildstr;
369 
370  INC_PTR(cs,wildstr,wildend); /* This is compared through cmp */
371  cmp=likeconv(cs,cmp);
372  do
373  {
374  while (str != str_end && (unsigned char) likeconv(cs,*str) != cmp)
375  str++;
376  if (str++ == str_end)
377  return(-1);
378  {
379  int tmp=my_wildcmp_bin(cs,str,str_end,wildstr,wildend,escape,w_one,
380  w_many);
381  if (tmp <= 0)
382  return(tmp);
383  }
384  } while (str != str_end && wildstr[0] != w_many);
385  return(-1);
386  }
387  }
388  return(str != str_end ? 1 : 0);
389 }
390 
391 
392 size_t
393 my_strnxfrm_8bit_bin(const charset_info_st * const cs,
394  unsigned char * dst, size_t dstlen, uint32_t nweights,
395  const unsigned char *src, size_t srclen, uint32_t flags)
396 {
397  set_if_smaller(srclen, dstlen);
398  set_if_smaller(srclen, (size_t) nweights);
399  if (dst != src)
400  memcpy(dst, src, srclen);
401  return my_strxfrm_pad_desc_and_reverse(cs, dst, dst + srclen, dst + dstlen,
402  nweights - srclen, flags, 0);
403 }
404 
405 
406 uint32_t my_instr_bin(const charset_info_st * const,
407  const char *b, size_t b_length,
408  const char *s, size_t s_length,
409  my_match_t *match, uint32_t nmatch)
410 {
411  const unsigned char *str, *search, *end, *search_end;
412 
413  if (s_length <= b_length)
414  {
415  if (!s_length)
416  {
417  if (nmatch)
418  {
419  match->beg= 0;
420  match->end= 0;
421  match->mb_len= 0;
422  }
423  return 1; /* Empty string is always found */
424  }
425 
426  str= (const unsigned char*) b;
427  search= (const unsigned char*) s;
428  end= (const unsigned char*) b+b_length-s_length+1;
429  search_end= (const unsigned char*) s + s_length;
430 
431 skip:
432  while (str != end)
433  {
434  if ( (*str++) == (*search))
435  {
436  const unsigned char *i,*j;
437 
438  i= str;
439  j= search+1;
440 
441  while (j != search_end)
442  if ((*i++) != (*j++))
443  goto skip;
444 
445  if (nmatch > 0)
446  {
447  match[0].beg= 0;
448  match[0].end= (size_t) (str- (const unsigned char*)b-1);
449  match[0].mb_len= match[0].end;
450 
451  if (nmatch > 1)
452  {
453  match[1].beg= match[0].end;
454  match[1].end= match[0].end+s_length;
455  match[1].mb_len= match[1].end-match[1].beg;
456  }
457  }
458  return 2;
459  }
460  }
461  }
462  return 0;
463 }
464 
465 
466 static MY_COLLATION_HANDLER my_collation_binary_handler =
467 {
468  NULL, /* init */
469  my_strnncoll_binary,
470  my_strnncollsp_binary,
471  my_strnxfrm_8bit_bin,
472  my_strnxfrmlen_simple,
473  my_like_range_simple,
474  my_wildcmp_bin,
475  my_strcasecmp_bin,
476  my_instr_bin,
477  my_hash_sort_bin,
478  my_propagate_simple
479 };
480 
481 
482 static MY_CHARSET_HANDLER my_charset_handler=
483 {
484  NULL, /* ismbchar */
485  my_mbcharlen_8bit, /* mbcharlen */
486  my_numchars_8bit,
487  my_charpos_8bit,
488  my_well_formed_len_8bit,
489  my_lengthsp_binary,
490  my_numcells_8bit,
491  my_mb_wc_bin,
492  my_wc_mb_bin,
493  my_mb_ctype_8bit,
494  my_case_str_bin,
495  my_case_str_bin,
496  my_case_bin,
497  my_case_bin,
498  my_snprintf_8bit,
499  my_long10_to_str_8bit,
500  my_int64_t10_to_str_8bit,
501  my_fill_8bit,
502  my_strntol_8bit,
503  my_strntoul_8bit,
504  my_strntoll_8bit,
505  my_strntoull_8bit,
506  my_strntod_8bit,
507  my_strtoll10_8bit,
508  my_strntoull10rnd_8bit,
509  my_scan_8bit
510 };
511 
512 
513 DRIZZLED_API charset_info_st my_charset_bin =
514 {
515  63,0,0, /* number */
516  MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_PRIMARY,/* state */
517  "binary", /* cs name */
518  "binary", /* name */
519  "", /* comment */
520  NULL, /* tailoring */
521  ctype_bin, /* ctype */
522  bin_char_array, /* to_lower */
523  bin_char_array, /* to_upper */
524  NULL, /* sort_order */
525  NULL, /* contractions */
526  NULL, /* sort_order_big*/
527  NULL, /* tab_to_uni */
528  NULL, /* tab_from_uni */
529  my_unicase_default, /* caseinfo */
530  NULL, /* state_map */
531  NULL, /* ident_map */
532  1, /* strxfrm_multiply */
533  1, /* caseup_multiply */
534  1, /* casedn_multiply */
535  1, /* mbminlen */
536  1, /* mbmaxlen */
537  0, /* min_sort_char */
538  255, /* max_sort_char */
539  0, /* pad char */
540  1, /* levels_for_compare */
541  1, /* levels_for_order */
542  &my_charset_handler,
543  &my_collation_binary_handler
544 };
545 
546 } /* namespace drizzled */
#define DRIZZLED_API
Definition: visibility.h:62