bytes_test.go 49 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016
  1. // Copyright 2009 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package bytes_test
  5. import (
  6. . "bytes"
  7. "fmt"
  8. "internal/testenv"
  9. "math/rand"
  10. "reflect"
  11. "runtime"
  12. "strings"
  13. "testing"
  14. "unicode"
  15. "unicode/utf8"
  16. )
  17. func eq(a, b []string) bool {
  18. if len(a) != len(b) {
  19. return false
  20. }
  21. for i := 0; i < len(a); i++ {
  22. if a[i] != b[i] {
  23. return false
  24. }
  25. }
  26. return true
  27. }
  28. func sliceOfString(s [][]byte) []string {
  29. result := make([]string, len(s))
  30. for i, v := range s {
  31. result[i] = string(v)
  32. }
  33. return result
  34. }
  35. // For ease of reading, the test cases use strings that are converted to byte
  36. // slices before invoking the functions.
  37. var abcd = "abcd"
  38. var faces = "☺☻☹"
  39. var commas = "1,2,3,4"
  40. var dots = "1....2....3....4"
  41. type BinOpTest struct {
  42. a string
  43. b string
  44. i int
  45. }
  46. func TestEqual(t *testing.T) {
  47. // Run the tests and check for allocation at the same time.
  48. allocs := testing.AllocsPerRun(10, func() {
  49. for _, tt := range compareTests {
  50. eql := Equal(tt.a, tt.b)
  51. if eql != (tt.i == 0) {
  52. t.Errorf(`Equal(%q, %q) = %v`, tt.a, tt.b, eql)
  53. }
  54. }
  55. })
  56. if allocs > 0 {
  57. t.Errorf("Equal allocated %v times", allocs)
  58. }
  59. }
  60. func TestEqualExhaustive(t *testing.T) {
  61. var size = 128
  62. if testing.Short() {
  63. size = 32
  64. }
  65. a := make([]byte, size)
  66. b := make([]byte, size)
  67. b_init := make([]byte, size)
  68. // randomish but deterministic data
  69. for i := 0; i < size; i++ {
  70. a[i] = byte(17 * i)
  71. b_init[i] = byte(23*i + 100)
  72. }
  73. for len := 0; len <= size; len++ {
  74. for x := 0; x <= size-len; x++ {
  75. for y := 0; y <= size-len; y++ {
  76. copy(b, b_init)
  77. copy(b[y:y+len], a[x:x+len])
  78. if !Equal(a[x:x+len], b[y:y+len]) || !Equal(b[y:y+len], a[x:x+len]) {
  79. t.Errorf("Equal(%d, %d, %d) = false", len, x, y)
  80. }
  81. }
  82. }
  83. }
  84. }
  85. // make sure Equal returns false for minimally different strings. The data
  86. // is all zeros except for a single one in one location.
  87. func TestNotEqual(t *testing.T) {
  88. var size = 128
  89. if testing.Short() {
  90. size = 32
  91. }
  92. a := make([]byte, size)
  93. b := make([]byte, size)
  94. for len := 0; len <= size; len++ {
  95. for x := 0; x <= size-len; x++ {
  96. for y := 0; y <= size-len; y++ {
  97. for diffpos := x; diffpos < x+len; diffpos++ {
  98. a[diffpos] = 1
  99. if Equal(a[x:x+len], b[y:y+len]) || Equal(b[y:y+len], a[x:x+len]) {
  100. t.Errorf("NotEqual(%d, %d, %d, %d) = true", len, x, y, diffpos)
  101. }
  102. a[diffpos] = 0
  103. }
  104. }
  105. }
  106. }
  107. }
  108. var indexTests = []BinOpTest{
  109. {"", "", 0},
  110. {"", "a", -1},
  111. {"", "foo", -1},
  112. {"fo", "foo", -1},
  113. {"foo", "baz", -1},
  114. {"foo", "foo", 0},
  115. {"oofofoofooo", "f", 2},
  116. {"oofofoofooo", "foo", 4},
  117. {"barfoobarfoo", "foo", 3},
  118. {"foo", "", 0},
  119. {"foo", "o", 1},
  120. {"abcABCabc", "A", 3},
  121. // cases with one byte strings - test IndexByte and special case in Index()
  122. {"", "a", -1},
  123. {"x", "a", -1},
  124. {"x", "x", 0},
  125. {"abc", "a", 0},
  126. {"abc", "b", 1},
  127. {"abc", "c", 2},
  128. {"abc", "x", -1},
  129. {"barfoobarfooyyyzzzyyyzzzyyyzzzyyyxxxzzzyyy", "x", 33},
  130. {"foofyfoobarfoobar", "y", 4},
  131. {"oooooooooooooooooooooo", "r", -1},
  132. {"oxoxoxoxoxoxoxoxoxoxoxoy", "oy", 22},
  133. {"oxoxoxoxoxoxoxoxoxoxoxox", "oy", -1},
  134. // test fallback to Rabin-Karp.
  135. {"000000000000000000000000000000000000000000000000000000000000000000000001", "0000000000000000000000000000000000000000000000000000000000000000001", 5},
  136. }
  137. var lastIndexTests = []BinOpTest{
  138. {"", "", 0},
  139. {"", "a", -1},
  140. {"", "foo", -1},
  141. {"fo", "foo", -1},
  142. {"foo", "foo", 0},
  143. {"foo", "f", 0},
  144. {"oofofoofooo", "f", 7},
  145. {"oofofoofooo", "foo", 7},
  146. {"barfoobarfoo", "foo", 9},
  147. {"foo", "", 3},
  148. {"foo", "o", 2},
  149. {"abcABCabc", "A", 3},
  150. {"abcABCabc", "a", 6},
  151. }
  152. var indexAnyTests = []BinOpTest{
  153. {"", "", -1},
  154. {"", "a", -1},
  155. {"", "abc", -1},
  156. {"a", "", -1},
  157. {"a", "a", 0},
  158. {"\x80", "\xffb", 0},
  159. {"aaa", "a", 0},
  160. {"abc", "xyz", -1},
  161. {"abc", "xcz", 2},
  162. {"ab☺c", "x☺yz", 2},
  163. {"a☺b☻c☹d", "cx", len("a☺b☻")},
  164. {"a☺b☻c☹d", "uvw☻xyz", len("a☺b")},
  165. {"aRegExp*", ".(|)*+?^$[]", 7},
  166. {dots + dots + dots, " ", -1},
  167. {"012abcba210", "\xffb", 4},
  168. {"012\x80bcb\x80210", "\xffb", 3},
  169. {"0123456\xcf\x80abc", "\xcfb\x80", 10},
  170. }
  171. var lastIndexAnyTests = []BinOpTest{
  172. {"", "", -1},
  173. {"", "a", -1},
  174. {"", "abc", -1},
  175. {"a", "", -1},
  176. {"a", "a", 0},
  177. {"\x80", "\xffb", 0},
  178. {"aaa", "a", 2},
  179. {"abc", "xyz", -1},
  180. {"abc", "ab", 1},
  181. {"ab☺c", "x☺yz", 2},
  182. {"a☺b☻c☹d", "cx", len("a☺b☻")},
  183. {"a☺b☻c☹d", "uvw☻xyz", len("a☺b")},
  184. {"a.RegExp*", ".(|)*+?^$[]", 8},
  185. {dots + dots + dots, " ", -1},
  186. {"012abcba210", "\xffb", 6},
  187. {"012\x80bcb\x80210", "\xffb", 7},
  188. {"0123456\xcf\x80abc", "\xcfb\x80", 10},
  189. }
  190. // Execute f on each test case. funcName should be the name of f; it's used
  191. // in failure reports.
  192. func runIndexTests(t *testing.T, f func(s, sep []byte) int, funcName string, testCases []BinOpTest) {
  193. for _, test := range testCases {
  194. a := []byte(test.a)
  195. b := []byte(test.b)
  196. actual := f(a, b)
  197. if actual != test.i {
  198. t.Errorf("%s(%q,%q) = %v; want %v", funcName, a, b, actual, test.i)
  199. }
  200. }
  201. var allocTests = []struct {
  202. a []byte
  203. b []byte
  204. i int
  205. }{
  206. // case for function Index.
  207. {[]byte("000000000000000000000000000000000000000000000000000000000000000000000001"), []byte("0000000000000000000000000000000000000000000000000000000000000000001"), 5},
  208. // case for function LastIndex.
  209. {[]byte("000000000000000000000000000000000000000000000000000000000000000010000"), []byte("00000000000000000000000000000000000000000000000000000000000001"), 3},
  210. }
  211. allocs := testing.AllocsPerRun(100, func() {
  212. if i := Index(allocTests[1].a, allocTests[1].b); i != allocTests[1].i {
  213. t.Errorf("Index([]byte(%q), []byte(%q)) = %v; want %v", allocTests[1].a, allocTests[1].b, i, allocTests[1].i)
  214. }
  215. if i := LastIndex(allocTests[0].a, allocTests[0].b); i != allocTests[0].i {
  216. t.Errorf("LastIndex([]byte(%q), []byte(%q)) = %v; want %v", allocTests[0].a, allocTests[0].b, i, allocTests[0].i)
  217. }
  218. })
  219. if allocs != 0 {
  220. t.Errorf("expected no allocations, got %f", allocs)
  221. }
  222. }
  223. func runIndexAnyTests(t *testing.T, f func(s []byte, chars string) int, funcName string, testCases []BinOpTest) {
  224. for _, test := range testCases {
  225. a := []byte(test.a)
  226. actual := f(a, test.b)
  227. if actual != test.i {
  228. t.Errorf("%s(%q,%q) = %v; want %v", funcName, a, test.b, actual, test.i)
  229. }
  230. }
  231. }
  232. func TestIndex(t *testing.T) { runIndexTests(t, Index, "Index", indexTests) }
  233. func TestLastIndex(t *testing.T) { runIndexTests(t, LastIndex, "LastIndex", lastIndexTests) }
  234. func TestIndexAny(t *testing.T) { runIndexAnyTests(t, IndexAny, "IndexAny", indexAnyTests) }
  235. func TestLastIndexAny(t *testing.T) {
  236. runIndexAnyTests(t, LastIndexAny, "LastIndexAny", lastIndexAnyTests)
  237. }
  238. func TestIndexByte(t *testing.T) {
  239. for _, tt := range indexTests {
  240. if len(tt.b) != 1 {
  241. continue
  242. }
  243. a := []byte(tt.a)
  244. b := tt.b[0]
  245. pos := IndexByte(a, b)
  246. if pos != tt.i {
  247. t.Errorf(`IndexByte(%q, '%c') = %v`, tt.a, b, pos)
  248. }
  249. posp := IndexBytePortable(a, b)
  250. if posp != tt.i {
  251. t.Errorf(`indexBytePortable(%q, '%c') = %v`, tt.a, b, posp)
  252. }
  253. }
  254. }
  255. func TestLastIndexByte(t *testing.T) {
  256. testCases := []BinOpTest{
  257. {"", "q", -1},
  258. {"abcdef", "q", -1},
  259. {"abcdefabcdef", "a", len("abcdef")}, // something in the middle
  260. {"abcdefabcdef", "f", len("abcdefabcde")}, // last byte
  261. {"zabcdefabcdef", "z", 0}, // first byte
  262. {"a☺b☻c☹d", "b", len("a☺")}, // non-ascii
  263. }
  264. for _, test := range testCases {
  265. actual := LastIndexByte([]byte(test.a), test.b[0])
  266. if actual != test.i {
  267. t.Errorf("LastIndexByte(%q,%c) = %v; want %v", test.a, test.b[0], actual, test.i)
  268. }
  269. }
  270. }
  271. // test a larger buffer with different sizes and alignments
  272. func TestIndexByteBig(t *testing.T) {
  273. var n = 1024
  274. if testing.Short() {
  275. n = 128
  276. }
  277. b := make([]byte, n)
  278. for i := 0; i < n; i++ {
  279. // different start alignments
  280. b1 := b[i:]
  281. for j := 0; j < len(b1); j++ {
  282. b1[j] = 'x'
  283. pos := IndexByte(b1, 'x')
  284. if pos != j {
  285. t.Errorf("IndexByte(%q, 'x') = %v", b1, pos)
  286. }
  287. b1[j] = 0
  288. pos = IndexByte(b1, 'x')
  289. if pos != -1 {
  290. t.Errorf("IndexByte(%q, 'x') = %v", b1, pos)
  291. }
  292. }
  293. // different end alignments
  294. b1 = b[:i]
  295. for j := 0; j < len(b1); j++ {
  296. b1[j] = 'x'
  297. pos := IndexByte(b1, 'x')
  298. if pos != j {
  299. t.Errorf("IndexByte(%q, 'x') = %v", b1, pos)
  300. }
  301. b1[j] = 0
  302. pos = IndexByte(b1, 'x')
  303. if pos != -1 {
  304. t.Errorf("IndexByte(%q, 'x') = %v", b1, pos)
  305. }
  306. }
  307. // different start and end alignments
  308. b1 = b[i/2 : n-(i+1)/2]
  309. for j := 0; j < len(b1); j++ {
  310. b1[j] = 'x'
  311. pos := IndexByte(b1, 'x')
  312. if pos != j {
  313. t.Errorf("IndexByte(%q, 'x') = %v", b1, pos)
  314. }
  315. b1[j] = 0
  316. pos = IndexByte(b1, 'x')
  317. if pos != -1 {
  318. t.Errorf("IndexByte(%q, 'x') = %v", b1, pos)
  319. }
  320. }
  321. }
  322. }
  323. // test a small index across all page offsets
  324. func TestIndexByteSmall(t *testing.T) {
  325. b := make([]byte, 5015) // bigger than a page
  326. // Make sure we find the correct byte even when straddling a page.
  327. for i := 0; i <= len(b)-15; i++ {
  328. for j := 0; j < 15; j++ {
  329. b[i+j] = byte(100 + j)
  330. }
  331. for j := 0; j < 15; j++ {
  332. p := IndexByte(b[i:i+15], byte(100+j))
  333. if p != j {
  334. t.Errorf("IndexByte(%q, %d) = %d", b[i:i+15], 100+j, p)
  335. }
  336. }
  337. for j := 0; j < 15; j++ {
  338. b[i+j] = 0
  339. }
  340. }
  341. // Make sure matches outside the slice never trigger.
  342. for i := 0; i <= len(b)-15; i++ {
  343. for j := 0; j < 15; j++ {
  344. b[i+j] = 1
  345. }
  346. for j := 0; j < 15; j++ {
  347. p := IndexByte(b[i:i+15], byte(0))
  348. if p != -1 {
  349. t.Errorf("IndexByte(%q, %d) = %d", b[i:i+15], 0, p)
  350. }
  351. }
  352. for j := 0; j < 15; j++ {
  353. b[i+j] = 0
  354. }
  355. }
  356. }
  357. func TestIndexRune(t *testing.T) {
  358. tests := []struct {
  359. in string
  360. rune rune
  361. want int
  362. }{
  363. {"", 'a', -1},
  364. {"", '☺', -1},
  365. {"foo", '☹', -1},
  366. {"foo", 'o', 1},
  367. {"foo☺bar", '☺', 3},
  368. {"foo☺☻☹bar", '☹', 9},
  369. {"a A x", 'A', 2},
  370. {"some_text=some_value", '=', 9},
  371. {"☺a", 'a', 3},
  372. {"a☻☺b", '☺', 4},
  373. // RuneError should match any invalid UTF-8 byte sequence.
  374. {"�", '�', 0},
  375. {"\xff", '�', 0},
  376. {"☻x�", '�', len("☻x")},
  377. {"☻x\xe2\x98", '�', len("☻x")},
  378. {"☻x\xe2\x98�", '�', len("☻x")},
  379. {"☻x\xe2\x98x", '�', len("☻x")},
  380. // Invalid rune values should never match.
  381. {"a☺b☻c☹d\xe2\x98�\xff�\xed\xa0\x80", -1, -1},
  382. {"a☺b☻c☹d\xe2\x98�\xff�\xed\xa0\x80", 0xD800, -1}, // Surrogate pair
  383. {"a☺b☻c☹d\xe2\x98�\xff�\xed\xa0\x80", utf8.MaxRune + 1, -1},
  384. }
  385. for _, tt := range tests {
  386. if got := IndexRune([]byte(tt.in), tt.rune); got != tt.want {
  387. t.Errorf("IndexRune(%q, %d) = %v; want %v", tt.in, tt.rune, got, tt.want)
  388. }
  389. }
  390. haystack := []byte("test世界")
  391. allocs := testing.AllocsPerRun(1000, func() {
  392. if i := IndexRune(haystack, 's'); i != 2 {
  393. t.Fatalf("'s' at %d; want 2", i)
  394. }
  395. if i := IndexRune(haystack, '世'); i != 4 {
  396. t.Fatalf("'世' at %d; want 4", i)
  397. }
  398. })
  399. if allocs != 0 {
  400. if runtime.Compiler == "gccgo" {
  401. t.Log("does not work on gccgo without better escape analysis")
  402. } else {
  403. t.Errorf("expected no allocations, got %f", allocs)
  404. }
  405. }
  406. }
  407. // test count of a single byte across page offsets
  408. func TestCountByte(t *testing.T) {
  409. b := make([]byte, 5015) // bigger than a page
  410. windows := []int{1, 2, 3, 4, 15, 16, 17, 31, 32, 33, 63, 64, 65, 128}
  411. testCountWindow := func(i, window int) {
  412. for j := 0; j < window; j++ {
  413. b[i+j] = byte(100)
  414. p := Count(b[i:i+window], []byte{100})
  415. if p != j+1 {
  416. t.Errorf("TestCountByte.Count(%q, 100) = %d", b[i:i+window], p)
  417. }
  418. }
  419. }
  420. maxWnd := windows[len(windows)-1]
  421. for i := 0; i <= 2*maxWnd; i++ {
  422. for _, window := range windows {
  423. if window > len(b[i:]) {
  424. window = len(b[i:])
  425. }
  426. testCountWindow(i, window)
  427. for j := 0; j < window; j++ {
  428. b[i+j] = byte(0)
  429. }
  430. }
  431. }
  432. for i := 4096 - (maxWnd + 1); i < len(b); i++ {
  433. for _, window := range windows {
  434. if window > len(b[i:]) {
  435. window = len(b[i:])
  436. }
  437. testCountWindow(i, window)
  438. for j := 0; j < window; j++ {
  439. b[i+j] = byte(0)
  440. }
  441. }
  442. }
  443. }
  444. // Make sure we don't count bytes outside our window
  445. func TestCountByteNoMatch(t *testing.T) {
  446. b := make([]byte, 5015)
  447. windows := []int{1, 2, 3, 4, 15, 16, 17, 31, 32, 33, 63, 64, 65, 128}
  448. for i := 0; i <= len(b); i++ {
  449. for _, window := range windows {
  450. if window > len(b[i:]) {
  451. window = len(b[i:])
  452. }
  453. // Fill the window with non-match
  454. for j := 0; j < window; j++ {
  455. b[i+j] = byte(100)
  456. }
  457. // Try to find something that doesn't exist
  458. p := Count(b[i:i+window], []byte{0})
  459. if p != 0 {
  460. t.Errorf("TestCountByteNoMatch(%q, 0) = %d", b[i:i+window], p)
  461. }
  462. for j := 0; j < window; j++ {
  463. b[i+j] = byte(0)
  464. }
  465. }
  466. }
  467. }
  468. var bmbuf []byte
  469. func valName(x int) string {
  470. if s := x >> 20; s<<20 == x {
  471. return fmt.Sprintf("%dM", s)
  472. }
  473. if s := x >> 10; s<<10 == x {
  474. return fmt.Sprintf("%dK", s)
  475. }
  476. return fmt.Sprint(x)
  477. }
  478. func benchBytes(b *testing.B, sizes []int, f func(b *testing.B, n int)) {
  479. for _, n := range sizes {
  480. if isRaceBuilder && n > 4<<10 {
  481. continue
  482. }
  483. b.Run(valName(n), func(b *testing.B) {
  484. if len(bmbuf) < n {
  485. bmbuf = make([]byte, n)
  486. }
  487. b.SetBytes(int64(n))
  488. f(b, n)
  489. })
  490. }
  491. }
  492. var indexSizes = []int{10, 32, 4 << 10, 4 << 20, 64 << 20}
  493. var isRaceBuilder = strings.HasSuffix(testenv.Builder(), "-race")
  494. func BenchmarkIndexByte(b *testing.B) {
  495. benchBytes(b, indexSizes, bmIndexByte(IndexByte))
  496. }
  497. func BenchmarkIndexBytePortable(b *testing.B) {
  498. benchBytes(b, indexSizes, bmIndexByte(IndexBytePortable))
  499. }
  500. func bmIndexByte(index func([]byte, byte) int) func(b *testing.B, n int) {
  501. return func(b *testing.B, n int) {
  502. buf := bmbuf[0:n]
  503. buf[n-1] = 'x'
  504. for i := 0; i < b.N; i++ {
  505. j := index(buf, 'x')
  506. if j != n-1 {
  507. b.Fatal("bad index", j)
  508. }
  509. }
  510. buf[n-1] = '\x00'
  511. }
  512. }
  513. func BenchmarkIndexRune(b *testing.B) {
  514. benchBytes(b, indexSizes, bmIndexRune(IndexRune))
  515. }
  516. func BenchmarkIndexRuneASCII(b *testing.B) {
  517. benchBytes(b, indexSizes, bmIndexRuneASCII(IndexRune))
  518. }
  519. func bmIndexRuneASCII(index func([]byte, rune) int) func(b *testing.B, n int) {
  520. return func(b *testing.B, n int) {
  521. buf := bmbuf[0:n]
  522. buf[n-1] = 'x'
  523. for i := 0; i < b.N; i++ {
  524. j := index(buf, 'x')
  525. if j != n-1 {
  526. b.Fatal("bad index", j)
  527. }
  528. }
  529. buf[n-1] = '\x00'
  530. }
  531. }
  532. func bmIndexRune(index func([]byte, rune) int) func(b *testing.B, n int) {
  533. return func(b *testing.B, n int) {
  534. buf := bmbuf[0:n]
  535. utf8.EncodeRune(buf[n-3:], '世')
  536. for i := 0; i < b.N; i++ {
  537. j := index(buf, '世')
  538. if j != n-3 {
  539. b.Fatal("bad index", j)
  540. }
  541. }
  542. buf[n-3] = '\x00'
  543. buf[n-2] = '\x00'
  544. buf[n-1] = '\x00'
  545. }
  546. }
  547. func BenchmarkEqual(b *testing.B) {
  548. b.Run("0", func(b *testing.B) {
  549. var buf [4]byte
  550. buf1 := buf[0:0]
  551. buf2 := buf[1:1]
  552. for i := 0; i < b.N; i++ {
  553. eq := Equal(buf1, buf2)
  554. if !eq {
  555. b.Fatal("bad equal")
  556. }
  557. }
  558. })
  559. sizes := []int{1, 6, 9, 15, 16, 20, 32, 4 << 10, 4 << 20, 64 << 20}
  560. benchBytes(b, sizes, bmEqual(Equal))
  561. }
  562. func bmEqual(equal func([]byte, []byte) bool) func(b *testing.B, n int) {
  563. return func(b *testing.B, n int) {
  564. if len(bmbuf) < 2*n {
  565. bmbuf = make([]byte, 2*n)
  566. }
  567. buf1 := bmbuf[0:n]
  568. buf2 := bmbuf[n : 2*n]
  569. buf1[n-1] = 'x'
  570. buf2[n-1] = 'x'
  571. for i := 0; i < b.N; i++ {
  572. eq := equal(buf1, buf2)
  573. if !eq {
  574. b.Fatal("bad equal")
  575. }
  576. }
  577. buf1[n-1] = '\x00'
  578. buf2[n-1] = '\x00'
  579. }
  580. }
  581. func BenchmarkIndex(b *testing.B) {
  582. benchBytes(b, indexSizes, func(b *testing.B, n int) {
  583. buf := bmbuf[0:n]
  584. buf[n-1] = 'x'
  585. for i := 0; i < b.N; i++ {
  586. j := Index(buf, buf[n-7:])
  587. if j != n-7 {
  588. b.Fatal("bad index", j)
  589. }
  590. }
  591. buf[n-1] = '\x00'
  592. })
  593. }
  594. func BenchmarkIndexEasy(b *testing.B) {
  595. benchBytes(b, indexSizes, func(b *testing.B, n int) {
  596. buf := bmbuf[0:n]
  597. buf[n-1] = 'x'
  598. buf[n-7] = 'x'
  599. for i := 0; i < b.N; i++ {
  600. j := Index(buf, buf[n-7:])
  601. if j != n-7 {
  602. b.Fatal("bad index", j)
  603. }
  604. }
  605. buf[n-1] = '\x00'
  606. buf[n-7] = '\x00'
  607. })
  608. }
  609. func BenchmarkCount(b *testing.B) {
  610. benchBytes(b, indexSizes, func(b *testing.B, n int) {
  611. buf := bmbuf[0:n]
  612. buf[n-1] = 'x'
  613. for i := 0; i < b.N; i++ {
  614. j := Count(buf, buf[n-7:])
  615. if j != 1 {
  616. b.Fatal("bad count", j)
  617. }
  618. }
  619. buf[n-1] = '\x00'
  620. })
  621. }
  622. func BenchmarkCountEasy(b *testing.B) {
  623. benchBytes(b, indexSizes, func(b *testing.B, n int) {
  624. buf := bmbuf[0:n]
  625. buf[n-1] = 'x'
  626. buf[n-7] = 'x'
  627. for i := 0; i < b.N; i++ {
  628. j := Count(buf, buf[n-7:])
  629. if j != 1 {
  630. b.Fatal("bad count", j)
  631. }
  632. }
  633. buf[n-1] = '\x00'
  634. buf[n-7] = '\x00'
  635. })
  636. }
  637. func BenchmarkCountSingle(b *testing.B) {
  638. benchBytes(b, indexSizes, func(b *testing.B, n int) {
  639. buf := bmbuf[0:n]
  640. step := 8
  641. for i := 0; i < len(buf); i += step {
  642. buf[i] = 1
  643. }
  644. expect := (len(buf) + (step - 1)) / step
  645. for i := 0; i < b.N; i++ {
  646. j := Count(buf, []byte{1})
  647. if j != expect {
  648. b.Fatal("bad count", j, expect)
  649. }
  650. }
  651. for i := 0; i < len(buf); i++ {
  652. buf[i] = 0
  653. }
  654. })
  655. }
  656. type SplitTest struct {
  657. s string
  658. sep string
  659. n int
  660. a []string
  661. }
  662. var splittests = []SplitTest{
  663. {"", "", -1, []string{}},
  664. {abcd, "a", 0, nil},
  665. {abcd, "", 2, []string{"a", "bcd"}},
  666. {abcd, "a", -1, []string{"", "bcd"}},
  667. {abcd, "z", -1, []string{"abcd"}},
  668. {abcd, "", -1, []string{"a", "b", "c", "d"}},
  669. {commas, ",", -1, []string{"1", "2", "3", "4"}},
  670. {dots, "...", -1, []string{"1", ".2", ".3", ".4"}},
  671. {faces, "☹", -1, []string{"☺☻", ""}},
  672. {faces, "~", -1, []string{faces}},
  673. {faces, "", -1, []string{"☺", "☻", "☹"}},
  674. {"1 2 3 4", " ", 3, []string{"1", "2", "3 4"}},
  675. {"1 2", " ", 3, []string{"1", "2"}},
  676. {"123", "", 2, []string{"1", "23"}},
  677. {"123", "", 17, []string{"1", "2", "3"}},
  678. }
  679. func TestSplit(t *testing.T) {
  680. for _, tt := range splittests {
  681. a := SplitN([]byte(tt.s), []byte(tt.sep), tt.n)
  682. // Appending to the results should not change future results.
  683. var x []byte
  684. for _, v := range a {
  685. x = append(v, 'z')
  686. }
  687. result := sliceOfString(a)
  688. if !eq(result, tt.a) {
  689. t.Errorf(`Split(%q, %q, %d) = %v; want %v`, tt.s, tt.sep, tt.n, result, tt.a)
  690. continue
  691. }
  692. if tt.n == 0 || len(a) == 0 {
  693. continue
  694. }
  695. if want := tt.a[len(tt.a)-1] + "z"; string(x) != want {
  696. t.Errorf("last appended result was %s; want %s", x, want)
  697. }
  698. s := Join(a, []byte(tt.sep))
  699. if string(s) != tt.s {
  700. t.Errorf(`Join(Split(%q, %q, %d), %q) = %q`, tt.s, tt.sep, tt.n, tt.sep, s)
  701. }
  702. if tt.n < 0 {
  703. b := Split([]byte(tt.s), []byte(tt.sep))
  704. if !reflect.DeepEqual(a, b) {
  705. t.Errorf("Split disagrees withSplitN(%q, %q, %d) = %v; want %v", tt.s, tt.sep, tt.n, b, a)
  706. }
  707. }
  708. if len(a) > 0 {
  709. in, out := a[0], s
  710. if cap(in) == cap(out) && &in[:1][0] == &out[:1][0] {
  711. t.Errorf("Join(%#v, %q) didn't copy", a, tt.sep)
  712. }
  713. }
  714. }
  715. }
  716. var splitaftertests = []SplitTest{
  717. {abcd, "a", -1, []string{"a", "bcd"}},
  718. {abcd, "z", -1, []string{"abcd"}},
  719. {abcd, "", -1, []string{"a", "b", "c", "d"}},
  720. {commas, ",", -1, []string{"1,", "2,", "3,", "4"}},
  721. {dots, "...", -1, []string{"1...", ".2...", ".3...", ".4"}},
  722. {faces, "☹", -1, []string{"☺☻☹", ""}},
  723. {faces, "~", -1, []string{faces}},
  724. {faces, "", -1, []string{"☺", "☻", "☹"}},
  725. {"1 2 3 4", " ", 3, []string{"1 ", "2 ", "3 4"}},
  726. {"1 2 3", " ", 3, []string{"1 ", "2 ", "3"}},
  727. {"1 2", " ", 3, []string{"1 ", "2"}},
  728. {"123", "", 2, []string{"1", "23"}},
  729. {"123", "", 17, []string{"1", "2", "3"}},
  730. }
  731. func TestSplitAfter(t *testing.T) {
  732. for _, tt := range splitaftertests {
  733. a := SplitAfterN([]byte(tt.s), []byte(tt.sep), tt.n)
  734. // Appending to the results should not change future results.
  735. var x []byte
  736. for _, v := range a {
  737. x = append(v, 'z')
  738. }
  739. result := sliceOfString(a)
  740. if !eq(result, tt.a) {
  741. t.Errorf(`Split(%q, %q, %d) = %v; want %v`, tt.s, tt.sep, tt.n, result, tt.a)
  742. continue
  743. }
  744. if want := tt.a[len(tt.a)-1] + "z"; string(x) != want {
  745. t.Errorf("last appended result was %s; want %s", x, want)
  746. }
  747. s := Join(a, nil)
  748. if string(s) != tt.s {
  749. t.Errorf(`Join(Split(%q, %q, %d), %q) = %q`, tt.s, tt.sep, tt.n, tt.sep, s)
  750. }
  751. if tt.n < 0 {
  752. b := SplitAfter([]byte(tt.s), []byte(tt.sep))
  753. if !reflect.DeepEqual(a, b) {
  754. t.Errorf("SplitAfter disagrees withSplitAfterN(%q, %q, %d) = %v; want %v", tt.s, tt.sep, tt.n, b, a)
  755. }
  756. }
  757. }
  758. }
  759. type FieldsTest struct {
  760. s string
  761. a []string
  762. }
  763. var fieldstests = []FieldsTest{
  764. {"", []string{}},
  765. {" ", []string{}},
  766. {" \t ", []string{}},
  767. {" abc ", []string{"abc"}},
  768. {"1 2 3 4", []string{"1", "2", "3", "4"}},
  769. {"1 2 3 4", []string{"1", "2", "3", "4"}},
  770. {"1\t\t2\t\t3\t4", []string{"1", "2", "3", "4"}},
  771. {"1\u20002\u20013\u20024", []string{"1", "2", "3", "4"}},
  772. {"\u2000\u2001\u2002", []string{}},
  773. {"\n™\t™\n", []string{"™", "™"}},
  774. {faces, []string{faces}},
  775. }
  776. func TestFields(t *testing.T) {
  777. for _, tt := range fieldstests {
  778. b := []byte(tt.s)
  779. a := Fields(b)
  780. // Appending to the results should not change future results.
  781. var x []byte
  782. for _, v := range a {
  783. x = append(v, 'z')
  784. }
  785. result := sliceOfString(a)
  786. if !eq(result, tt.a) {
  787. t.Errorf("Fields(%q) = %v; want %v", tt.s, a, tt.a)
  788. continue
  789. }
  790. if string(b) != tt.s {
  791. t.Errorf("slice changed to %s; want %s", string(b), tt.s)
  792. }
  793. if len(tt.a) > 0 {
  794. if want := tt.a[len(tt.a)-1] + "z"; string(x) != want {
  795. t.Errorf("last appended result was %s; want %s", x, want)
  796. }
  797. }
  798. }
  799. }
  800. func TestFieldsFunc(t *testing.T) {
  801. for _, tt := range fieldstests {
  802. a := FieldsFunc([]byte(tt.s), unicode.IsSpace)
  803. result := sliceOfString(a)
  804. if !eq(result, tt.a) {
  805. t.Errorf("FieldsFunc(%q, unicode.IsSpace) = %v; want %v", tt.s, a, tt.a)
  806. continue
  807. }
  808. }
  809. pred := func(c rune) bool { return c == 'X' }
  810. var fieldsFuncTests = []FieldsTest{
  811. {"", []string{}},
  812. {"XX", []string{}},
  813. {"XXhiXXX", []string{"hi"}},
  814. {"aXXbXXXcX", []string{"a", "b", "c"}},
  815. }
  816. for _, tt := range fieldsFuncTests {
  817. b := []byte(tt.s)
  818. a := FieldsFunc(b, pred)
  819. // Appending to the results should not change future results.
  820. var x []byte
  821. for _, v := range a {
  822. x = append(v, 'z')
  823. }
  824. result := sliceOfString(a)
  825. if !eq(result, tt.a) {
  826. t.Errorf("FieldsFunc(%q) = %v, want %v", tt.s, a, tt.a)
  827. }
  828. if string(b) != tt.s {
  829. t.Errorf("slice changed to %s; want %s", b, tt.s)
  830. }
  831. if len(tt.a) > 0 {
  832. if want := tt.a[len(tt.a)-1] + "z"; string(x) != want {
  833. t.Errorf("last appended result was %s; want %s", x, want)
  834. }
  835. }
  836. }
  837. }
  838. // Test case for any function which accepts and returns a byte slice.
  839. // For ease of creation, we write the input byte slice as a string.
  840. type StringTest struct {
  841. in string
  842. out []byte
  843. }
  844. var upperTests = []StringTest{
  845. {"", []byte("")},
  846. {"ONLYUPPER", []byte("ONLYUPPER")},
  847. {"abc", []byte("ABC")},
  848. {"AbC123", []byte("ABC123")},
  849. {"azAZ09_", []byte("AZAZ09_")},
  850. {"longStrinGwitHmixofsmaLLandcAps", []byte("LONGSTRINGWITHMIXOFSMALLANDCAPS")},
  851. {"long\u0250string\u0250with\u0250nonascii\u2C6Fchars", []byte("LONG\u2C6FSTRING\u2C6FWITH\u2C6FNONASCII\u2C6FCHARS")},
  852. {"\u0250\u0250\u0250\u0250\u0250", []byte("\u2C6F\u2C6F\u2C6F\u2C6F\u2C6F")}, // grows one byte per char
  853. {"a\u0080\U0010FFFF", []byte("A\u0080\U0010FFFF")}, // test utf8.RuneSelf and utf8.MaxRune
  854. }
  855. var lowerTests = []StringTest{
  856. {"", []byte("")},
  857. {"abc", []byte("abc")},
  858. {"AbC123", []byte("abc123")},
  859. {"azAZ09_", []byte("azaz09_")},
  860. {"longStrinGwitHmixofsmaLLandcAps", []byte("longstringwithmixofsmallandcaps")},
  861. {"LONG\u2C6FSTRING\u2C6FWITH\u2C6FNONASCII\u2C6FCHARS", []byte("long\u0250string\u0250with\u0250nonascii\u0250chars")},
  862. {"\u2C6D\u2C6D\u2C6D\u2C6D\u2C6D", []byte("\u0251\u0251\u0251\u0251\u0251")}, // shrinks one byte per char
  863. {"A\u0080\U0010FFFF", []byte("a\u0080\U0010FFFF")}, // test utf8.RuneSelf and utf8.MaxRune
  864. }
  865. const space = "\t\v\r\f\n\u0085\u00a0\u2000\u3000"
  866. var trimSpaceTests = []StringTest{
  867. {"", nil},
  868. {" a", []byte("a")},
  869. {"b ", []byte("b")},
  870. {"abc", []byte("abc")},
  871. {space + "abc" + space, []byte("abc")},
  872. {" ", nil},
  873. {"\u3000 ", nil},
  874. {" \u3000", nil},
  875. {" \t\r\n \t\t\r\r\n\n ", nil},
  876. {" \t\r\n x\t\t\r\r\n\n ", []byte("x")},
  877. {" \u2000\t\r\n x\t\t\r\r\ny\n \u3000", []byte("x\t\t\r\r\ny")},
  878. {"1 \t\r\n2", []byte("1 \t\r\n2")},
  879. {" x\x80", []byte("x\x80")},
  880. {" x\xc0", []byte("x\xc0")},
  881. {"x \xc0\xc0 ", []byte("x \xc0\xc0")},
  882. {"x \xc0", []byte("x \xc0")},
  883. {"x \xc0 ", []byte("x \xc0")},
  884. {"x \xc0\xc0 ", []byte("x \xc0\xc0")},
  885. {"x ☺\xc0\xc0 ", []byte("x ☺\xc0\xc0")},
  886. {"x ☺ ", []byte("x ☺")},
  887. }
  888. // Execute f on each test case. funcName should be the name of f; it's used
  889. // in failure reports.
  890. func runStringTests(t *testing.T, f func([]byte) []byte, funcName string, testCases []StringTest) {
  891. for _, tc := range testCases {
  892. actual := f([]byte(tc.in))
  893. if actual == nil && tc.out != nil {
  894. t.Errorf("%s(%q) = nil; want %q", funcName, tc.in, tc.out)
  895. }
  896. if actual != nil && tc.out == nil {
  897. t.Errorf("%s(%q) = %q; want nil", funcName, tc.in, actual)
  898. }
  899. if !Equal(actual, tc.out) {
  900. t.Errorf("%s(%q) = %q; want %q", funcName, tc.in, actual, tc.out)
  901. }
  902. }
  903. }
  904. func tenRunes(r rune) string {
  905. runes := make([]rune, 10)
  906. for i := range runes {
  907. runes[i] = r
  908. }
  909. return string(runes)
  910. }
  911. // User-defined self-inverse mapping function
  912. func rot13(r rune) rune {
  913. const step = 13
  914. if r >= 'a' && r <= 'z' {
  915. return ((r - 'a' + step) % 26) + 'a'
  916. }
  917. if r >= 'A' && r <= 'Z' {
  918. return ((r - 'A' + step) % 26) + 'A'
  919. }
  920. return r
  921. }
  922. func TestMap(t *testing.T) {
  923. // Run a couple of awful growth/shrinkage tests
  924. a := tenRunes('a')
  925. // 1. Grow. This triggers two reallocations in Map.
  926. maxRune := func(r rune) rune { return unicode.MaxRune }
  927. m := Map(maxRune, []byte(a))
  928. expect := tenRunes(unicode.MaxRune)
  929. if string(m) != expect {
  930. t.Errorf("growing: expected %q got %q", expect, m)
  931. }
  932. // 2. Shrink
  933. minRune := func(r rune) rune { return 'a' }
  934. m = Map(minRune, []byte(tenRunes(unicode.MaxRune)))
  935. expect = a
  936. if string(m) != expect {
  937. t.Errorf("shrinking: expected %q got %q", expect, m)
  938. }
  939. // 3. Rot13
  940. m = Map(rot13, []byte("a to zed"))
  941. expect = "n gb mrq"
  942. if string(m) != expect {
  943. t.Errorf("rot13: expected %q got %q", expect, m)
  944. }
  945. // 4. Rot13^2
  946. m = Map(rot13, Map(rot13, []byte("a to zed")))
  947. expect = "a to zed"
  948. if string(m) != expect {
  949. t.Errorf("rot13: expected %q got %q", expect, m)
  950. }
  951. // 5. Drop
  952. dropNotLatin := func(r rune) rune {
  953. if unicode.Is(unicode.Latin, r) {
  954. return r
  955. }
  956. return -1
  957. }
  958. m = Map(dropNotLatin, []byte("Hello, 세계"))
  959. expect = "Hello"
  960. if string(m) != expect {
  961. t.Errorf("drop: expected %q got %q", expect, m)
  962. }
  963. // 6. Invalid rune
  964. invalidRune := func(r rune) rune {
  965. return utf8.MaxRune + 1
  966. }
  967. m = Map(invalidRune, []byte("x"))
  968. expect = "\uFFFD"
  969. if string(m) != expect {
  970. t.Errorf("invalidRune: expected %q got %q", expect, m)
  971. }
  972. }
  973. func TestToUpper(t *testing.T) { runStringTests(t, ToUpper, "ToUpper", upperTests) }
  974. func TestToLower(t *testing.T) { runStringTests(t, ToLower, "ToLower", lowerTests) }
  975. func BenchmarkToUpper(b *testing.B) {
  976. for _, tc := range upperTests {
  977. tin := []byte(tc.in)
  978. b.Run(tc.in, func(b *testing.B) {
  979. for i := 0; i < b.N; i++ {
  980. actual := ToUpper(tin)
  981. if !Equal(actual, tc.out) {
  982. b.Errorf("ToUpper(%q) = %q; want %q", tc.in, actual, tc.out)
  983. }
  984. }
  985. })
  986. }
  987. }
  988. func BenchmarkToLower(b *testing.B) {
  989. for _, tc := range lowerTests {
  990. tin := []byte(tc.in)
  991. b.Run(tc.in, func(b *testing.B) {
  992. for i := 0; i < b.N; i++ {
  993. actual := ToLower(tin)
  994. if !Equal(actual, tc.out) {
  995. b.Errorf("ToLower(%q) = %q; want %q", tc.in, actual, tc.out)
  996. }
  997. }
  998. })
  999. }
  1000. }
  1001. var toValidUTF8Tests = []struct {
  1002. in string
  1003. repl string
  1004. out string
  1005. }{
  1006. {"", "\uFFFD", ""},
  1007. {"abc", "\uFFFD", "abc"},
  1008. {"\uFDDD", "\uFFFD", "\uFDDD"},
  1009. {"a\xffb", "\uFFFD", "a\uFFFDb"},
  1010. {"a\xffb\uFFFD", "X", "aXb\uFFFD"},
  1011. {"a☺\xffb☺\xC0\xAFc☺\xff", "", "a☺b☺c☺"},
  1012. {"a☺\xffb☺\xC0\xAFc☺\xff", "日本語", "a☺日本語b☺日本語c☺日本語"},
  1013. {"\xC0\xAF", "\uFFFD", "\uFFFD"},
  1014. {"\xE0\x80\xAF", "\uFFFD", "\uFFFD"},
  1015. {"\xed\xa0\x80", "abc", "abc"},
  1016. {"\xed\xbf\xbf", "\uFFFD", "\uFFFD"},
  1017. {"\xF0\x80\x80\xaf", "☺", "☺"},
  1018. {"\xF8\x80\x80\x80\xAF", "\uFFFD", "\uFFFD"},
  1019. {"\xFC\x80\x80\x80\x80\xAF", "\uFFFD", "\uFFFD"},
  1020. }
  1021. func TestToValidUTF8(t *testing.T) {
  1022. for _, tc := range toValidUTF8Tests {
  1023. got := ToValidUTF8([]byte(tc.in), []byte(tc.repl))
  1024. if !Equal(got, []byte(tc.out)) {
  1025. t.Errorf("ToValidUTF8(%q, %q) = %q; want %q", tc.in, tc.repl, got, tc.out)
  1026. }
  1027. }
  1028. }
  1029. func TestTrimSpace(t *testing.T) { runStringTests(t, TrimSpace, "TrimSpace", trimSpaceTests) }
  1030. type RepeatTest struct {
  1031. in, out string
  1032. count int
  1033. }
  1034. var RepeatTests = []RepeatTest{
  1035. {"", "", 0},
  1036. {"", "", 1},
  1037. {"", "", 2},
  1038. {"-", "", 0},
  1039. {"-", "-", 1},
  1040. {"-", "----------", 10},
  1041. {"abc ", "abc abc abc ", 3},
  1042. }
  1043. func TestRepeat(t *testing.T) {
  1044. for _, tt := range RepeatTests {
  1045. tin := []byte(tt.in)
  1046. tout := []byte(tt.out)
  1047. a := Repeat(tin, tt.count)
  1048. if !Equal(a, tout) {
  1049. t.Errorf("Repeat(%q, %d) = %q; want %q", tin, tt.count, a, tout)
  1050. continue
  1051. }
  1052. }
  1053. }
  1054. func repeat(b []byte, count int) (err error) {
  1055. defer func() {
  1056. if r := recover(); r != nil {
  1057. switch v := r.(type) {
  1058. case error:
  1059. err = v
  1060. default:
  1061. err = fmt.Errorf("%s", v)
  1062. }
  1063. }
  1064. }()
  1065. Repeat(b, count)
  1066. return
  1067. }
  1068. // See Issue golang.org/issue/16237
  1069. func TestRepeatCatchesOverflow(t *testing.T) {
  1070. tests := [...]struct {
  1071. s string
  1072. count int
  1073. errStr string
  1074. }{
  1075. 0: {"--", -2147483647, "negative"},
  1076. 1: {"", int(^uint(0) >> 1), ""},
  1077. 2: {"-", 10, ""},
  1078. 3: {"gopher", 0, ""},
  1079. 4: {"-", -1, "negative"},
  1080. 5: {"--", -102, "negative"},
  1081. 6: {string(make([]byte, 255)), int((^uint(0))/255 + 1), "overflow"},
  1082. }
  1083. for i, tt := range tests {
  1084. err := repeat([]byte(tt.s), tt.count)
  1085. if tt.errStr == "" {
  1086. if err != nil {
  1087. t.Errorf("#%d panicked %v", i, err)
  1088. }
  1089. continue
  1090. }
  1091. if err == nil || !strings.Contains(err.Error(), tt.errStr) {
  1092. t.Errorf("#%d expected %q got %q", i, tt.errStr, err)
  1093. }
  1094. }
  1095. }
  1096. func runesEqual(a, b []rune) bool {
  1097. if len(a) != len(b) {
  1098. return false
  1099. }
  1100. for i, r := range a {
  1101. if r != b[i] {
  1102. return false
  1103. }
  1104. }
  1105. return true
  1106. }
  1107. type RunesTest struct {
  1108. in string
  1109. out []rune
  1110. lossy bool
  1111. }
  1112. var RunesTests = []RunesTest{
  1113. {"", []rune{}, false},
  1114. {" ", []rune{32}, false},
  1115. {"ABC", []rune{65, 66, 67}, false},
  1116. {"abc", []rune{97, 98, 99}, false},
  1117. {"\u65e5\u672c\u8a9e", []rune{26085, 26412, 35486}, false},
  1118. {"ab\x80c", []rune{97, 98, 0xFFFD, 99}, true},
  1119. {"ab\xc0c", []rune{97, 98, 0xFFFD, 99}, true},
  1120. }
  1121. func TestRunes(t *testing.T) {
  1122. for _, tt := range RunesTests {
  1123. tin := []byte(tt.in)
  1124. a := Runes(tin)
  1125. if !runesEqual(a, tt.out) {
  1126. t.Errorf("Runes(%q) = %v; want %v", tin, a, tt.out)
  1127. continue
  1128. }
  1129. if !tt.lossy {
  1130. // can only test reassembly if we didn't lose information
  1131. s := string(a)
  1132. if s != tt.in {
  1133. t.Errorf("string(Runes(%q)) = %x; want %x", tin, s, tin)
  1134. }
  1135. }
  1136. }
  1137. }
  1138. type TrimTest struct {
  1139. f string
  1140. in, arg, out string
  1141. }
  1142. var trimTests = []TrimTest{
  1143. {"Trim", "abba", "a", "bb"},
  1144. {"Trim", "abba", "ab", ""},
  1145. {"TrimLeft", "abba", "ab", ""},
  1146. {"TrimRight", "abba", "ab", ""},
  1147. {"TrimLeft", "abba", "a", "bba"},
  1148. {"TrimLeft", "abba", "b", "abba"},
  1149. {"TrimRight", "abba", "a", "abb"},
  1150. {"TrimRight", "abba", "b", "abba"},
  1151. {"Trim", "<tag>", "<>", "tag"},
  1152. {"Trim", "* listitem", " *", "listitem"},
  1153. {"Trim", `"quote"`, `"`, "quote"},
  1154. {"Trim", "\u2C6F\u2C6F\u0250\u0250\u2C6F\u2C6F", "\u2C6F", "\u0250\u0250"},
  1155. {"Trim", "\x80test\xff", "\xff", "test"},
  1156. {"Trim", " Ġ ", " ", "Ġ"},
  1157. {"Trim", " Ġİ0", "0 ", "Ġİ"},
  1158. //empty string tests
  1159. {"Trim", "abba", "", "abba"},
  1160. {"Trim", "", "123", ""},
  1161. {"Trim", "", "", ""},
  1162. {"TrimLeft", "abba", "", "abba"},
  1163. {"TrimLeft", "", "123", ""},
  1164. {"TrimLeft", "", "", ""},
  1165. {"TrimRight", "abba", "", "abba"},
  1166. {"TrimRight", "", "123", ""},
  1167. {"TrimRight", "", "", ""},
  1168. {"TrimRight", "☺\xc0", "☺", "☺\xc0"},
  1169. {"TrimPrefix", "aabb", "a", "abb"},
  1170. {"TrimPrefix", "aabb", "b", "aabb"},
  1171. {"TrimSuffix", "aabb", "a", "aabb"},
  1172. {"TrimSuffix", "aabb", "b", "aab"},
  1173. }
  1174. func TestTrim(t *testing.T) {
  1175. for _, tc := range trimTests {
  1176. name := tc.f
  1177. var f func([]byte, string) []byte
  1178. var fb func([]byte, []byte) []byte
  1179. switch name {
  1180. case "Trim":
  1181. f = Trim
  1182. case "TrimLeft":
  1183. f = TrimLeft
  1184. case "TrimRight":
  1185. f = TrimRight
  1186. case "TrimPrefix":
  1187. fb = TrimPrefix
  1188. case "TrimSuffix":
  1189. fb = TrimSuffix
  1190. default:
  1191. t.Errorf("Undefined trim function %s", name)
  1192. }
  1193. var actual string
  1194. if f != nil {
  1195. actual = string(f([]byte(tc.in), tc.arg))
  1196. } else {
  1197. actual = string(fb([]byte(tc.in), []byte(tc.arg)))
  1198. }
  1199. if actual != tc.out {
  1200. t.Errorf("%s(%q, %q) = %q; want %q", name, tc.in, tc.arg, actual, tc.out)
  1201. }
  1202. }
  1203. }
  1204. type predicate struct {
  1205. f func(r rune) bool
  1206. name string
  1207. }
  1208. var isSpace = predicate{unicode.IsSpace, "IsSpace"}
  1209. var isDigit = predicate{unicode.IsDigit, "IsDigit"}
  1210. var isUpper = predicate{unicode.IsUpper, "IsUpper"}
  1211. var isValidRune = predicate{
  1212. func(r rune) bool {
  1213. return r != utf8.RuneError
  1214. },
  1215. "IsValidRune",
  1216. }
  1217. type TrimFuncTest struct {
  1218. f predicate
  1219. in string
  1220. trimOut []byte
  1221. leftOut []byte
  1222. rightOut []byte
  1223. }
  1224. func not(p predicate) predicate {
  1225. return predicate{
  1226. func(r rune) bool {
  1227. return !p.f(r)
  1228. },
  1229. "not " + p.name,
  1230. }
  1231. }
  1232. var trimFuncTests = []TrimFuncTest{
  1233. {isSpace, space + " hello " + space,
  1234. []byte("hello"),
  1235. []byte("hello " + space),
  1236. []byte(space + " hello")},
  1237. {isDigit, "\u0e50\u0e5212hello34\u0e50\u0e51",
  1238. []byte("hello"),
  1239. []byte("hello34\u0e50\u0e51"),
  1240. []byte("\u0e50\u0e5212hello")},
  1241. {isUpper, "\u2C6F\u2C6F\u2C6F\u2C6FABCDhelloEF\u2C6F\u2C6FGH\u2C6F\u2C6F",
  1242. []byte("hello"),
  1243. []byte("helloEF\u2C6F\u2C6FGH\u2C6F\u2C6F"),
  1244. []byte("\u2C6F\u2C6F\u2C6F\u2C6FABCDhello")},
  1245. {not(isSpace), "hello" + space + "hello",
  1246. []byte(space),
  1247. []byte(space + "hello"),
  1248. []byte("hello" + space)},
  1249. {not(isDigit), "hello\u0e50\u0e521234\u0e50\u0e51helo",
  1250. []byte("\u0e50\u0e521234\u0e50\u0e51"),
  1251. []byte("\u0e50\u0e521234\u0e50\u0e51helo"),
  1252. []byte("hello\u0e50\u0e521234\u0e50\u0e51")},
  1253. {isValidRune, "ab\xc0a\xc0cd",
  1254. []byte("\xc0a\xc0"),
  1255. []byte("\xc0a\xc0cd"),
  1256. []byte("ab\xc0a\xc0")},
  1257. {not(isValidRune), "\xc0a\xc0",
  1258. []byte("a"),
  1259. []byte("a\xc0"),
  1260. []byte("\xc0a")},
  1261. // The nils returned by TrimLeftFunc are odd behavior, but we need
  1262. // to preserve backwards compatibility.
  1263. {isSpace, "",
  1264. nil,
  1265. nil,
  1266. []byte("")},
  1267. {isSpace, " ",
  1268. nil,
  1269. nil,
  1270. []byte("")},
  1271. }
  1272. func TestTrimFunc(t *testing.T) {
  1273. for _, tc := range trimFuncTests {
  1274. trimmers := []struct {
  1275. name string
  1276. trim func(s []byte, f func(r rune) bool) []byte
  1277. out []byte
  1278. }{
  1279. {"TrimFunc", TrimFunc, tc.trimOut},
  1280. {"TrimLeftFunc", TrimLeftFunc, tc.leftOut},
  1281. {"TrimRightFunc", TrimRightFunc, tc.rightOut},
  1282. }
  1283. for _, trimmer := range trimmers {
  1284. actual := trimmer.trim([]byte(tc.in), tc.f.f)
  1285. if actual == nil && trimmer.out != nil {
  1286. t.Errorf("%s(%q, %q) = nil; want %q", trimmer.name, tc.in, tc.f.name, trimmer.out)
  1287. }
  1288. if actual != nil && trimmer.out == nil {
  1289. t.Errorf("%s(%q, %q) = %q; want nil", trimmer.name, tc.in, tc.f.name, actual)
  1290. }
  1291. if !Equal(actual, trimmer.out) {
  1292. t.Errorf("%s(%q, %q) = %q; want %q", trimmer.name, tc.in, tc.f.name, actual, trimmer.out)
  1293. }
  1294. }
  1295. }
  1296. }
  1297. type IndexFuncTest struct {
  1298. in string
  1299. f predicate
  1300. first, last int
  1301. }
  1302. var indexFuncTests = []IndexFuncTest{
  1303. {"", isValidRune, -1, -1},
  1304. {"abc", isDigit, -1, -1},
  1305. {"0123", isDigit, 0, 3},
  1306. {"a1b", isDigit, 1, 1},
  1307. {space, isSpace, 0, len(space) - 3}, // last rune in space is 3 bytes
  1308. {"\u0e50\u0e5212hello34\u0e50\u0e51", isDigit, 0, 18},
  1309. {"\u2C6F\u2C6F\u2C6F\u2C6FABCDhelloEF\u2C6F\u2C6FGH\u2C6F\u2C6F", isUpper, 0, 34},
  1310. {"12\u0e50\u0e52hello34\u0e50\u0e51", not(isDigit), 8, 12},
  1311. // tests of invalid UTF-8
  1312. {"\x801", isDigit, 1, 1},
  1313. {"\x80abc", isDigit, -1, -1},
  1314. {"\xc0a\xc0", isValidRune, 1, 1},
  1315. {"\xc0a\xc0", not(isValidRune), 0, 2},
  1316. {"\xc0☺\xc0", not(isValidRune), 0, 4},
  1317. {"\xc0☺\xc0\xc0", not(isValidRune), 0, 5},
  1318. {"ab\xc0a\xc0cd", not(isValidRune), 2, 4},
  1319. {"a\xe0\x80cd", not(isValidRune), 1, 2},
  1320. }
  1321. func TestIndexFunc(t *testing.T) {
  1322. for _, tc := range indexFuncTests {
  1323. first := IndexFunc([]byte(tc.in), tc.f.f)
  1324. if first != tc.first {
  1325. t.Errorf("IndexFunc(%q, %s) = %d; want %d", tc.in, tc.f.name, first, tc.first)
  1326. }
  1327. last := LastIndexFunc([]byte(tc.in), tc.f.f)
  1328. if last != tc.last {
  1329. t.Errorf("LastIndexFunc(%q, %s) = %d; want %d", tc.in, tc.f.name, last, tc.last)
  1330. }
  1331. }
  1332. }
  1333. type ReplaceTest struct {
  1334. in string
  1335. old, new string
  1336. n int
  1337. out string
  1338. }
  1339. var ReplaceTests = []ReplaceTest{
  1340. {"hello", "l", "L", 0, "hello"},
  1341. {"hello", "l", "L", -1, "heLLo"},
  1342. {"hello", "x", "X", -1, "hello"},
  1343. {"", "x", "X", -1, ""},
  1344. {"radar", "r", "<r>", -1, "<r>ada<r>"},
  1345. {"", "", "<>", -1, "<>"},
  1346. {"banana", "a", "<>", -1, "b<>n<>n<>"},
  1347. {"banana", "a", "<>", 1, "b<>nana"},
  1348. {"banana", "a", "<>", 1000, "b<>n<>n<>"},
  1349. {"banana", "an", "<>", -1, "b<><>a"},
  1350. {"banana", "ana", "<>", -1, "b<>na"},
  1351. {"banana", "", "<>", -1, "<>b<>a<>n<>a<>n<>a<>"},
  1352. {"banana", "", "<>", 10, "<>b<>a<>n<>a<>n<>a<>"},
  1353. {"banana", "", "<>", 6, "<>b<>a<>n<>a<>n<>a"},
  1354. {"banana", "", "<>", 5, "<>b<>a<>n<>a<>na"},
  1355. {"banana", "", "<>", 1, "<>banana"},
  1356. {"banana", "a", "a", -1, "banana"},
  1357. {"banana", "a", "a", 1, "banana"},
  1358. {"☺☻☹", "", "<>", -1, "<>☺<>☻<>☹<>"},
  1359. }
  1360. func TestReplace(t *testing.T) {
  1361. for _, tt := range ReplaceTests {
  1362. in := append([]byte(tt.in), "<spare>"...)
  1363. in = in[:len(tt.in)]
  1364. out := Replace(in, []byte(tt.old), []byte(tt.new), tt.n)
  1365. if s := string(out); s != tt.out {
  1366. t.Errorf("Replace(%q, %q, %q, %d) = %q, want %q", tt.in, tt.old, tt.new, tt.n, s, tt.out)
  1367. }
  1368. if cap(in) == cap(out) && &in[:1][0] == &out[:1][0] {
  1369. t.Errorf("Replace(%q, %q, %q, %d) didn't copy", tt.in, tt.old, tt.new, tt.n)
  1370. }
  1371. if tt.n == -1 {
  1372. out := ReplaceAll(in, []byte(tt.old), []byte(tt.new))
  1373. if s := string(out); s != tt.out {
  1374. t.Errorf("ReplaceAll(%q, %q, %q) = %q, want %q", tt.in, tt.old, tt.new, s, tt.out)
  1375. }
  1376. }
  1377. }
  1378. }
  1379. type TitleTest struct {
  1380. in, out string
  1381. }
  1382. var TitleTests = []TitleTest{
  1383. {"", ""},
  1384. {"a", "A"},
  1385. {" aaa aaa aaa ", " Aaa Aaa Aaa "},
  1386. {" Aaa Aaa Aaa ", " Aaa Aaa Aaa "},
  1387. {"123a456", "123a456"},
  1388. {"double-blind", "Double-Blind"},
  1389. {"ÿøû", "Ÿøû"},
  1390. {"with_underscore", "With_underscore"},
  1391. {"unicode \xe2\x80\xa8 line separator", "Unicode \xe2\x80\xa8 Line Separator"},
  1392. }
  1393. func TestTitle(t *testing.T) {
  1394. for _, tt := range TitleTests {
  1395. if s := string(Title([]byte(tt.in))); s != tt.out {
  1396. t.Errorf("Title(%q) = %q, want %q", tt.in, s, tt.out)
  1397. }
  1398. }
  1399. }
  1400. var ToTitleTests = []TitleTest{
  1401. {"", ""},
  1402. {"a", "A"},
  1403. {" aaa aaa aaa ", " AAA AAA AAA "},
  1404. {" Aaa Aaa Aaa ", " AAA AAA AAA "},
  1405. {"123a456", "123A456"},
  1406. {"double-blind", "DOUBLE-BLIND"},
  1407. {"ÿøû", "ŸØÛ"},
  1408. }
  1409. func TestToTitle(t *testing.T) {
  1410. for _, tt := range ToTitleTests {
  1411. if s := string(ToTitle([]byte(tt.in))); s != tt.out {
  1412. t.Errorf("ToTitle(%q) = %q, want %q", tt.in, s, tt.out)
  1413. }
  1414. }
  1415. }
  1416. var EqualFoldTests = []struct {
  1417. s, t string
  1418. out bool
  1419. }{
  1420. {"abc", "abc", true},
  1421. {"ABcd", "ABcd", true},
  1422. {"123abc", "123ABC", true},
  1423. {"αβδ", "ΑΒΔ", true},
  1424. {"abc", "xyz", false},
  1425. {"abc", "XYZ", false},
  1426. {"abcdefghijk", "abcdefghijX", false},
  1427. {"abcdefghijk", "abcdefghij\u212A", true},
  1428. {"abcdefghijK", "abcdefghij\u212A", true},
  1429. {"abcdefghijkz", "abcdefghij\u212Ay", false},
  1430. {"abcdefghijKz", "abcdefghij\u212Ay", false},
  1431. }
  1432. func TestEqualFold(t *testing.T) {
  1433. for _, tt := range EqualFoldTests {
  1434. if out := EqualFold([]byte(tt.s), []byte(tt.t)); out != tt.out {
  1435. t.Errorf("EqualFold(%#q, %#q) = %v, want %v", tt.s, tt.t, out, tt.out)
  1436. }
  1437. if out := EqualFold([]byte(tt.t), []byte(tt.s)); out != tt.out {
  1438. t.Errorf("EqualFold(%#q, %#q) = %v, want %v", tt.t, tt.s, out, tt.out)
  1439. }
  1440. }
  1441. }
  1442. var cutTests = []struct {
  1443. s, sep string
  1444. before, after string
  1445. found bool
  1446. }{
  1447. {"abc", "b", "a", "c", true},
  1448. {"abc", "a", "", "bc", true},
  1449. {"abc", "c", "ab", "", true},
  1450. {"abc", "abc", "", "", true},
  1451. {"abc", "", "", "abc", true},
  1452. {"abc", "d", "abc", "", false},
  1453. {"", "d", "", "", false},
  1454. {"", "", "", "", true},
  1455. }
  1456. func TestCut(t *testing.T) {
  1457. for _, tt := range cutTests {
  1458. if before, after, found := Cut([]byte(tt.s), []byte(tt.sep)); string(before) != tt.before || string(after) != tt.after || found != tt.found {
  1459. t.Errorf("Cut(%q, %q) = %q, %q, %v, want %q, %q, %v", tt.s, tt.sep, before, after, found, tt.before, tt.after, tt.found)
  1460. }
  1461. }
  1462. }
  1463. func TestBufferGrowNegative(t *testing.T) {
  1464. defer func() {
  1465. if err := recover(); err == nil {
  1466. t.Fatal("Grow(-1) should have panicked")
  1467. }
  1468. }()
  1469. var b Buffer
  1470. b.Grow(-1)
  1471. }
  1472. func TestBufferTruncateNegative(t *testing.T) {
  1473. defer func() {
  1474. if err := recover(); err == nil {
  1475. t.Fatal("Truncate(-1) should have panicked")
  1476. }
  1477. }()
  1478. var b Buffer
  1479. b.Truncate(-1)
  1480. }
  1481. func TestBufferTruncateOutOfRange(t *testing.T) {
  1482. defer func() {
  1483. if err := recover(); err == nil {
  1484. t.Fatal("Truncate(20) should have panicked")
  1485. }
  1486. }()
  1487. var b Buffer
  1488. b.Write(make([]byte, 10))
  1489. b.Truncate(20)
  1490. }
  1491. var containsTests = []struct {
  1492. b, subslice []byte
  1493. want bool
  1494. }{
  1495. {[]byte("hello"), []byte("hel"), true},
  1496. {[]byte("日本語"), []byte("日本"), true},
  1497. {[]byte("hello"), []byte("Hello, world"), false},
  1498. {[]byte("東京"), []byte("京東"), false},
  1499. }
  1500. func TestContains(t *testing.T) {
  1501. for _, tt := range containsTests {
  1502. if got := Contains(tt.b, tt.subslice); got != tt.want {
  1503. t.Errorf("Contains(%q, %q) = %v, want %v", tt.b, tt.subslice, got, tt.want)
  1504. }
  1505. }
  1506. }
  1507. var ContainsAnyTests = []struct {
  1508. b []byte
  1509. substr string
  1510. expected bool
  1511. }{
  1512. {[]byte(""), "", false},
  1513. {[]byte(""), "a", false},
  1514. {[]byte(""), "abc", false},
  1515. {[]byte("a"), "", false},
  1516. {[]byte("a"), "a", true},
  1517. {[]byte("aaa"), "a", true},
  1518. {[]byte("abc"), "xyz", false},
  1519. {[]byte("abc"), "xcz", true},
  1520. {[]byte("a☺b☻c☹d"), "uvw☻xyz", true},
  1521. {[]byte("aRegExp*"), ".(|)*+?^$[]", true},
  1522. {[]byte(dots + dots + dots), " ", false},
  1523. }
  1524. func TestContainsAny(t *testing.T) {
  1525. for _, ct := range ContainsAnyTests {
  1526. if ContainsAny(ct.b, ct.substr) != ct.expected {
  1527. t.Errorf("ContainsAny(%s, %s) = %v, want %v",
  1528. ct.b, ct.substr, !ct.expected, ct.expected)
  1529. }
  1530. }
  1531. }
  1532. var ContainsRuneTests = []struct {
  1533. b []byte
  1534. r rune
  1535. expected bool
  1536. }{
  1537. {[]byte(""), 'a', false},
  1538. {[]byte("a"), 'a', true},
  1539. {[]byte("aaa"), 'a', true},
  1540. {[]byte("abc"), 'y', false},
  1541. {[]byte("abc"), 'c', true},
  1542. {[]byte("a☺b☻c☹d"), 'x', false},
  1543. {[]byte("a☺b☻c☹d"), '☻', true},
  1544. {[]byte("aRegExp*"), '*', true},
  1545. }
  1546. func TestContainsRune(t *testing.T) {
  1547. for _, ct := range ContainsRuneTests {
  1548. if ContainsRune(ct.b, ct.r) != ct.expected {
  1549. t.Errorf("ContainsRune(%q, %q) = %v, want %v",
  1550. ct.b, ct.r, !ct.expected, ct.expected)
  1551. }
  1552. }
  1553. }
  1554. var makeFieldsInput = func() []byte {
  1555. x := make([]byte, 1<<20)
  1556. // Input is ~10% space, ~10% 2-byte UTF-8, rest ASCII non-space.
  1557. for i := range x {
  1558. switch rand.Intn(10) {
  1559. case 0:
  1560. x[i] = ' '
  1561. case 1:
  1562. if i > 0 && x[i-1] == 'x' {
  1563. copy(x[i-1:], "χ")
  1564. break
  1565. }
  1566. fallthrough
  1567. default:
  1568. x[i] = 'x'
  1569. }
  1570. }
  1571. return x
  1572. }
  1573. var makeFieldsInputASCII = func() []byte {
  1574. x := make([]byte, 1<<20)
  1575. // Input is ~10% space, rest ASCII non-space.
  1576. for i := range x {
  1577. if rand.Intn(10) == 0 {
  1578. x[i] = ' '
  1579. } else {
  1580. x[i] = 'x'
  1581. }
  1582. }
  1583. return x
  1584. }
  1585. var bytesdata = []struct {
  1586. name string
  1587. data []byte
  1588. }{
  1589. {"ASCII", makeFieldsInputASCII()},
  1590. {"Mixed", makeFieldsInput()},
  1591. }
  1592. func BenchmarkFields(b *testing.B) {
  1593. for _, sd := range bytesdata {
  1594. b.Run(sd.name, func(b *testing.B) {
  1595. for j := 1 << 4; j <= 1<<20; j <<= 4 {
  1596. b.Run(fmt.Sprintf("%d", j), func(b *testing.B) {
  1597. b.ReportAllocs()
  1598. b.SetBytes(int64(j))
  1599. data := sd.data[:j]
  1600. for i := 0; i < b.N; i++ {
  1601. Fields(data)
  1602. }
  1603. })
  1604. }
  1605. })
  1606. }
  1607. }
  1608. func BenchmarkFieldsFunc(b *testing.B) {
  1609. for _, sd := range bytesdata {
  1610. b.Run(sd.name, func(b *testing.B) {
  1611. for j := 1 << 4; j <= 1<<20; j <<= 4 {
  1612. b.Run(fmt.Sprintf("%d", j), func(b *testing.B) {
  1613. b.ReportAllocs()
  1614. b.SetBytes(int64(j))
  1615. data := sd.data[:j]
  1616. for i := 0; i < b.N; i++ {
  1617. FieldsFunc(data, unicode.IsSpace)
  1618. }
  1619. })
  1620. }
  1621. })
  1622. }
  1623. }
  1624. func BenchmarkTrimSpace(b *testing.B) {
  1625. tests := []struct {
  1626. name string
  1627. input []byte
  1628. }{
  1629. {"NoTrim", []byte("typical")},
  1630. {"ASCII", []byte(" foo bar ")},
  1631. {"SomeNonASCII", []byte(" \u2000\t\r\n x\t\t\r\r\ny\n \u3000 ")},
  1632. {"JustNonASCII", []byte("\u2000\u2000\u2000☺☺☺☺\u3000\u3000\u3000")},
  1633. }
  1634. for _, test := range tests {
  1635. b.Run(test.name, func(b *testing.B) {
  1636. for i := 0; i < b.N; i++ {
  1637. TrimSpace(test.input)
  1638. }
  1639. })
  1640. }
  1641. }
  1642. func BenchmarkToValidUTF8(b *testing.B) {
  1643. tests := []struct {
  1644. name string
  1645. input []byte
  1646. }{
  1647. {"Valid", []byte("typical")},
  1648. {"InvalidASCII", []byte("foo\xffbar")},
  1649. {"InvalidNonASCII", []byte("日本語\xff日本語")},
  1650. }
  1651. replacement := []byte("\uFFFD")
  1652. b.ResetTimer()
  1653. for _, test := range tests {
  1654. b.Run(test.name, func(b *testing.B) {
  1655. for i := 0; i < b.N; i++ {
  1656. ToValidUTF8(test.input, replacement)
  1657. }
  1658. })
  1659. }
  1660. }
  1661. func makeBenchInputHard() []byte {
  1662. tokens := [...]string{
  1663. "<a>", "<p>", "<b>", "<strong>",
  1664. "</a>", "</p>", "</b>", "</strong>",
  1665. "hello", "world",
  1666. }
  1667. x := make([]byte, 0, 1<<20)
  1668. for {
  1669. i := rand.Intn(len(tokens))
  1670. if len(x)+len(tokens[i]) >= 1<<20 {
  1671. break
  1672. }
  1673. x = append(x, tokens[i]...)
  1674. }
  1675. return x
  1676. }
  1677. var benchInputHard = makeBenchInputHard()
  1678. func benchmarkIndexHard(b *testing.B, sep []byte) {
  1679. for i := 0; i < b.N; i++ {
  1680. Index(benchInputHard, sep)
  1681. }
  1682. }
  1683. func benchmarkLastIndexHard(b *testing.B, sep []byte) {
  1684. for i := 0; i < b.N; i++ {
  1685. LastIndex(benchInputHard, sep)
  1686. }
  1687. }
  1688. func benchmarkCountHard(b *testing.B, sep []byte) {
  1689. for i := 0; i < b.N; i++ {
  1690. Count(benchInputHard, sep)
  1691. }
  1692. }
  1693. func BenchmarkIndexHard1(b *testing.B) { benchmarkIndexHard(b, []byte("<>")) }
  1694. func BenchmarkIndexHard2(b *testing.B) { benchmarkIndexHard(b, []byte("</pre>")) }
  1695. func BenchmarkIndexHard3(b *testing.B) { benchmarkIndexHard(b, []byte("<b>hello world</b>")) }
  1696. func BenchmarkIndexHard4(b *testing.B) {
  1697. benchmarkIndexHard(b, []byte("<pre><b>hello</b><strong>world</strong></pre>"))
  1698. }
  1699. func BenchmarkLastIndexHard1(b *testing.B) { benchmarkLastIndexHard(b, []byte("<>")) }
  1700. func BenchmarkLastIndexHard2(b *testing.B) { benchmarkLastIndexHard(b, []byte("</pre>")) }
  1701. func BenchmarkLastIndexHard3(b *testing.B) { benchmarkLastIndexHard(b, []byte("<b>hello world</b>")) }
  1702. func BenchmarkCountHard1(b *testing.B) { benchmarkCountHard(b, []byte("<>")) }
  1703. func BenchmarkCountHard2(b *testing.B) { benchmarkCountHard(b, []byte("</pre>")) }
  1704. func BenchmarkCountHard3(b *testing.B) { benchmarkCountHard(b, []byte("<b>hello world</b>")) }
  1705. func BenchmarkSplitEmptySeparator(b *testing.B) {
  1706. for i := 0; i < b.N; i++ {
  1707. Split(benchInputHard, nil)
  1708. }
  1709. }
  1710. func BenchmarkSplitSingleByteSeparator(b *testing.B) {
  1711. sep := []byte("/")
  1712. for i := 0; i < b.N; i++ {
  1713. Split(benchInputHard, sep)
  1714. }
  1715. }
  1716. func BenchmarkSplitMultiByteSeparator(b *testing.B) {
  1717. sep := []byte("hello")
  1718. for i := 0; i < b.N; i++ {
  1719. Split(benchInputHard, sep)
  1720. }
  1721. }
  1722. func BenchmarkSplitNSingleByteSeparator(b *testing.B) {
  1723. sep := []byte("/")
  1724. for i := 0; i < b.N; i++ {
  1725. SplitN(benchInputHard, sep, 10)
  1726. }
  1727. }
  1728. func BenchmarkSplitNMultiByteSeparator(b *testing.B) {
  1729. sep := []byte("hello")
  1730. for i := 0; i < b.N; i++ {
  1731. SplitN(benchInputHard, sep, 10)
  1732. }
  1733. }
  1734. func BenchmarkRepeat(b *testing.B) {
  1735. for i := 0; i < b.N; i++ {
  1736. Repeat([]byte("-"), 80)
  1737. }
  1738. }
  1739. func BenchmarkBytesCompare(b *testing.B) {
  1740. for n := 1; n <= 2048; n <<= 1 {
  1741. b.Run(fmt.Sprint(n), func(b *testing.B) {
  1742. var x = make([]byte, n)
  1743. var y = make([]byte, n)
  1744. for i := 0; i < n; i++ {
  1745. x[i] = 'a'
  1746. }
  1747. for i := 0; i < n; i++ {
  1748. y[i] = 'a'
  1749. }
  1750. b.ResetTimer()
  1751. for i := 0; i < b.N; i++ {
  1752. Compare(x, y)
  1753. }
  1754. })
  1755. }
  1756. }
  1757. func BenchmarkIndexAnyASCII(b *testing.B) {
  1758. x := Repeat([]byte{'#'}, 2048) // Never matches set
  1759. cs := "0123456789abcdefghijklmnopqrstuvwxyz0123456789abcdefghijklmnopqrstuvwxyz"
  1760. for k := 1; k <= 2048; k <<= 4 {
  1761. for j := 1; j <= 64; j <<= 1 {
  1762. b.Run(fmt.Sprintf("%d:%d", k, j), func(b *testing.B) {
  1763. for i := 0; i < b.N; i++ {
  1764. IndexAny(x[:k], cs[:j])
  1765. }
  1766. })
  1767. }
  1768. }
  1769. }
  1770. func BenchmarkIndexAnyUTF8(b *testing.B) {
  1771. x := Repeat([]byte{'#'}, 2048) // Never matches set
  1772. cs := "你好世界, hello world. 你好世界, hello world. 你好世界, hello world."
  1773. for k := 1; k <= 2048; k <<= 4 {
  1774. for j := 1; j <= 64; j <<= 1 {
  1775. b.Run(fmt.Sprintf("%d:%d", k, j), func(b *testing.B) {
  1776. for i := 0; i < b.N; i++ {
  1777. IndexAny(x[:k], cs[:j])
  1778. }
  1779. })
  1780. }
  1781. }
  1782. }
  1783. func BenchmarkLastIndexAnyASCII(b *testing.B) {
  1784. x := Repeat([]byte{'#'}, 2048) // Never matches set
  1785. cs := "0123456789abcdefghijklmnopqrstuvwxyz0123456789abcdefghijklmnopqrstuvwxyz"
  1786. for k := 1; k <= 2048; k <<= 4 {
  1787. for j := 1; j <= 64; j <<= 1 {
  1788. b.Run(fmt.Sprintf("%d:%d", k, j), func(b *testing.B) {
  1789. for i := 0; i < b.N; i++ {
  1790. LastIndexAny(x[:k], cs[:j])
  1791. }
  1792. })
  1793. }
  1794. }
  1795. }
  1796. func BenchmarkLastIndexAnyUTF8(b *testing.B) {
  1797. x := Repeat([]byte{'#'}, 2048) // Never matches set
  1798. cs := "你好世界, hello world. 你好世界, hello world. 你好世界, hello world."
  1799. for k := 1; k <= 2048; k <<= 4 {
  1800. for j := 1; j <= 64; j <<= 1 {
  1801. b.Run(fmt.Sprintf("%d:%d", k, j), func(b *testing.B) {
  1802. for i := 0; i < b.N; i++ {
  1803. LastIndexAny(x[:k], cs[:j])
  1804. }
  1805. })
  1806. }
  1807. }
  1808. }
  1809. func BenchmarkTrimASCII(b *testing.B) {
  1810. cs := "0123456789abcdef"
  1811. for k := 1; k <= 4096; k <<= 4 {
  1812. for j := 1; j <= 16; j <<= 1 {
  1813. b.Run(fmt.Sprintf("%d:%d", k, j), func(b *testing.B) {
  1814. x := Repeat([]byte(cs[:j]), k) // Always matches set
  1815. for i := 0; i < b.N; i++ {
  1816. Trim(x[:k], cs[:j])
  1817. }
  1818. })
  1819. }
  1820. }
  1821. }
  1822. func BenchmarkTrimByte(b *testing.B) {
  1823. x := []byte(" the quick brown fox ")
  1824. for i := 0; i < b.N; i++ {
  1825. Trim(x, " ")
  1826. }
  1827. }
  1828. func BenchmarkIndexPeriodic(b *testing.B) {
  1829. key := []byte{1, 1}
  1830. for _, skip := range [...]int{2, 4, 8, 16, 32, 64} {
  1831. b.Run(fmt.Sprintf("IndexPeriodic%d", skip), func(b *testing.B) {
  1832. buf := make([]byte, 1<<16)
  1833. for i := 0; i < len(buf); i += skip {
  1834. buf[i] = 1
  1835. }
  1836. for i := 0; i < b.N; i++ {
  1837. Index(buf, key)
  1838. }
  1839. })
  1840. }
  1841. }