char-1.cc 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143
  1. // { dg-require-iconv "UCS-2BE" }
  2. // { dg-require-iconv "ISO-8859-15" }
  3. // 2000-08-22 Benjamin Kosnik <bkoz@cygnus.com>
  4. // Copyright (C) 2000-2022 Free Software Foundation, Inc.
  5. //
  6. // This file is part of the GNU ISO C++ Library. This library is free
  7. // software; you can redistribute it and/or modify it under the
  8. // terms of the GNU General Public License as published by the
  9. // Free Software Foundation; either version 3, or (at your option)
  10. // any later version.
  11. // This library is distributed in the hope that it will be useful,
  12. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. // GNU General Public License for more details.
  15. // You should have received a copy of the GNU General Public License along
  16. // with this library; see the file COPYING3. If not see
  17. // <http://www.gnu.org/licenses/>.
  18. // 22.2.1.5 - Template class codecvt [lib.locale.codecvt]
  19. #include <locale>
  20. #include <cstring>
  21. #include <testsuite_hooks.h>
  22. #include <ext/codecvt_specializations.h>
  23. /*
  24. > how do I check that these conversions are correct?
  25. Very easy. Since all the characters are from ASCII you simply
  26. zero-extend the values.
  27. drepper$ echo 'black pearl jasmine tea' | od -t x1
  28. 0000000 62 6c 61 63 6b 20 70 65 61 72 6c 20 6a 61 73 6d
  29. 0000020 69 6e 65 20 74 65 61 0a
  30. So the UCS-2 string is
  31. 0x0062, 0x006c, 0x0061, ...
  32. You get the idea. With iconv() you have to take care of the
  33. byte-order, though. UCS-2 can mean little- or big endian. Looking at
  34. your result
  35. > $9 = 25856
  36. it shows that the other byte-order is used (25856 == 0x6500).
  37. */
  38. // Partial specialization using encoding_state.
  39. // codecvt<unicode_t, char, encoding_state>
  40. // UNICODE - UCS2 (big endian)
  41. void test01()
  42. {
  43. using namespace std;
  44. typedef codecvt_base::result result;
  45. typedef unsigned short int_type;
  46. typedef char ext_type;
  47. typedef __gnu_cxx::encoding_state state_type;
  48. typedef codecvt<int_type, ext_type, state_type> unicode_codecvt;
  49. typedef char_traits<int_type> int_traits;
  50. typedef char_traits<ext_type> ext_traits;
  51. const ext_type* e_lit = "black pearl jasmine tea";
  52. int size = strlen(e_lit);
  53. char i_lit_base[50] __attribute__((aligned(__alignof__(int_type)))) =
  54. {
  55. char(0x00), char(0x62), char(0x00), char(0x6c), char(0x00), char(0x61),
  56. char(0x00), char(0x63), char(0x00), char(0x6b), char(0x00), char(0x20),
  57. char(0x00), char(0x70), char(0x00), char(0x65), char(0x00), char(0x61),
  58. char(0x00), char(0x72), char(0x00), char(0x6c), char(0x00), char(0x20),
  59. char(0x00), char(0x6a), char(0x00), char(0x61), char(0x00), char(0x73),
  60. char(0x00), char(0x6d), char(0x00), char(0x69), char(0x00), char(0x6e),
  61. char(0x00), char(0x65), char(0x00), char(0x20), char(0x00), char(0x74),
  62. char(0x00), char(0x65), char(0x00), char(0x61), char(0x00), char(0xa0)
  63. };
  64. const int_type* i_lit = reinterpret_cast<int_type*>(i_lit_base);
  65. const ext_type* efrom_next;
  66. const int_type* ifrom_next;
  67. ext_type* e_arr = new ext_type[size + 1];
  68. ext_type* eto_next;
  69. int_type* i_arr = new int_type[size + 1];
  70. int_type* ito_next;
  71. // construct a locale object with the specialized facet.
  72. locale loc(locale::classic(), new unicode_codecvt);
  73. // sanity check the constructed locale has the specialized facet.
  74. VERIFY( has_facet<unicode_codecvt>(loc) );
  75. const unicode_codecvt& cvt = use_facet<unicode_codecvt>(loc);
  76. // in
  77. // unicode_codecvt::state_type state01("UCS-2BE", "ISO-8859-15", 0xfeff, 0);
  78. unicode_codecvt::state_type state01("UCS-2BE", "ISO-8859-15", 0, 0);
  79. // internal encoding is bigger because of bom
  80. result r1 = cvt.in(state01, e_lit, e_lit + size, efrom_next,
  81. i_arr, i_arr + size + 1, ito_next);
  82. VERIFY( r1 == codecvt_base::ok );
  83. VERIFY( !int_traits::compare(i_arr, i_lit, size) );
  84. VERIFY( efrom_next == e_lit + size );
  85. VERIFY( ito_next == i_arr + size );
  86. // out
  87. unicode_codecvt::state_type state02("UCS-2BE", "ISO-8859-15", 0, 0);
  88. result r2 = cvt.out(state02, i_lit, i_lit + size, ifrom_next,
  89. e_arr, e_arr + size, eto_next);
  90. VERIFY( r2 == codecvt_base::ok );
  91. VERIFY( !ext_traits::compare(e_arr, e_lit, size) );
  92. VERIFY( ifrom_next == i_lit + size );
  93. VERIFY( eto_next == e_arr + size );
  94. // unshift
  95. ext_traits::copy(e_arr, e_lit, size);
  96. unicode_codecvt::state_type state03("UCS-2BE", "ISO-8859-15", 0, 0);
  97. result r3 = cvt.unshift(state03, e_arr, e_arr + size, eto_next);
  98. VERIFY( r3 == codecvt_base::noconv );
  99. VERIFY( !ext_traits::compare(e_arr, e_lit, size) );
  100. VERIFY( eto_next == e_arr );
  101. int i = cvt.encoding();
  102. VERIFY( i == 2 ); // Target-dependent.
  103. VERIFY( !cvt.always_noconv() );
  104. unicode_codecvt::state_type state04("UCS-2BE", "ISO-8859-15", 0, 0);
  105. int j = cvt.length(state03, e_lit, e_lit + size, 5);
  106. VERIFY( j == 5 );
  107. int k = cvt.max_length();
  108. VERIFY( k == 1 );
  109. delete [] e_arr;
  110. delete [] i_arr;
  111. }
  112. int main ()
  113. {
  114. test01();
  115. return 0;
  116. }