test_util.py 106 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914
  1. # Copyright (c) 2019 - 2025, Ilan Schnell; All Rights Reserved
  2. # bitarray is published under the PSF license.
  3. #
  4. # Author: Ilan Schnell
  5. """
  6. Tests for bitarray.util module
  7. """
  8. import os
  9. import sys
  10. import math
  11. import array
  12. import base64
  13. import binascii
  14. import operator
  15. import struct
  16. import shutil
  17. import tempfile
  18. import unittest
  19. from io import StringIO
  20. from functools import reduce
  21. from random import (choice, choices, getrandbits, randrange, randint, random,
  22. sample, seed)
  23. from string import hexdigits, whitespace
  24. from collections import Counter
  25. from bitarray import (bitarray, frozenbitarray, decodetree, bits2bytes,
  26. _set_default_endian)
  27. from bitarray.test_bitarray import Util, skipIf, is_pypy, urandom_2, PTRSIZE
  28. from bitarray.util import (
  29. zeros, ones, urandom, random_k, random_p, pprint, strip, count_n,
  30. parity, gen_primes, sum_indices, xor_indices,
  31. count_and, count_or, count_xor, any_and, subset,
  32. correspond_all, byteswap, intervals,
  33. serialize, deserialize, ba2hex, hex2ba, ba2base, base2ba,
  34. ba2int, int2ba,
  35. sc_encode, sc_decode, vl_encode, vl_decode,
  36. _huffman_tree, huffman_code, canonical_huffman, canonical_decode,
  37. )
  38. from bitarray.util import _Random, _ssqi # type: ignore
  39. # --------------------------- zeros() ones() -----------------------------
  40. class ZerosOnesTests(unittest.TestCase):
  41. def test_basic(self):
  42. for _ in range(50):
  43. default_endian = choice(['little', 'big'])
  44. _set_default_endian(default_endian)
  45. a = choice([zeros(0), zeros(0, None), zeros(0, endian=None),
  46. ones(0), ones(0, None), ones(0, endian=None)])
  47. self.assertEqual(a, bitarray())
  48. self.assertEqual(a.endian, default_endian)
  49. self.assertEqual(type(a), bitarray)
  50. endian = choice(['little', 'big', None])
  51. n = randrange(100)
  52. a = choice([zeros(n, endian), zeros(n, endian=endian)])
  53. self.assertEqual(a.to01(), n * "0")
  54. self.assertEqual(a.endian, endian or default_endian)
  55. b = choice([ones(n, endian), ones(n, endian=endian)])
  56. self.assertEqual(b.to01(), n * "1")
  57. self.assertEqual(b.endian, endian or default_endian)
  58. def test_errors(self):
  59. for f in zeros, ones:
  60. self.assertRaises(TypeError, f) # no argument
  61. self.assertRaises(TypeError, f, '')
  62. self.assertRaises(TypeError, f, bitarray())
  63. self.assertRaises(TypeError, f, [])
  64. self.assertRaises(TypeError, f, 1.0)
  65. self.assertRaises(ValueError, f, -1)
  66. # endian not string
  67. for x in 0, 1, {}, [], False, True:
  68. self.assertRaises(TypeError, f, 0, x)
  69. # endian wrong string
  70. self.assertRaises(ValueError, f, 0, 'foo')
  71. # ----------------------------- urandom() ---------------------------------
  72. class URandomTests(unittest.TestCase):
  73. def test_basic(self):
  74. for _ in range(20):
  75. default_endian = choice(['little', 'big'])
  76. _set_default_endian(default_endian)
  77. a = choice([urandom(0), urandom(0, endian=None)])
  78. self.assertEqual(a, bitarray())
  79. self.assertEqual(a.endian, default_endian)
  80. endian = choice(['little', 'big', None])
  81. n = randrange(100)
  82. a = choice([urandom(n, endian), urandom(n, endian=endian)])
  83. self.assertEqual(len(a), n)
  84. self.assertEqual(a.endian, endian or default_endian)
  85. self.assertEqual(type(a), bitarray)
  86. def test_errors(self):
  87. U = urandom
  88. self.assertRaises(TypeError, U)
  89. self.assertRaises(TypeError, U, '')
  90. self.assertRaises(TypeError, U, bitarray())
  91. self.assertRaises(TypeError, U, [])
  92. self.assertRaises(TypeError, U, 1.0)
  93. self.assertRaises(ValueError, U, -1)
  94. self.assertRaises(TypeError, U, 0, 1)
  95. self.assertRaises(ValueError, U, 0, 'foo')
  96. def test_count(self):
  97. a = urandom(10_000_000)
  98. # see if population is within expectation
  99. self.assertTrue(abs(a.count() - 5_000_000) <= 15_811)
  100. # ---------------------------- random_k() ---------------------------------
  101. class Random_K_Tests(unittest.TestCase):
  102. def test_basic(self):
  103. for _ in range(250):
  104. default_endian = choice(['little', 'big'])
  105. _set_default_endian(default_endian)
  106. endian = choice(['little', 'big', None])
  107. n = randrange(120)
  108. k = randint(0, n)
  109. a = random_k(n, k, endian)
  110. self.assertTrue(type(a), bitarray)
  111. self.assertEqual(len(a), n)
  112. self.assertEqual(a.count(), k)
  113. self.assertEqual(a.endian, endian or default_endian)
  114. def test_inputs_and_edge_cases(self):
  115. R = random_k
  116. self.assertRaises(TypeError, R)
  117. self.assertRaises(TypeError, R, 4)
  118. self.assertRaises(TypeError, R, 1, "0.5")
  119. self.assertRaises(TypeError, R, 1, p=1)
  120. self.assertRaises(TypeError, R, 11, 5.5) # see issue #239
  121. self.assertRaises(ValueError, R, -1, 0)
  122. for k in -1, 11: # k is not 0 <= k <= n
  123. self.assertRaises(ValueError, R, 10, k)
  124. self.assertRaises(ValueError, R, 10, 7, 'foo')
  125. self.assertRaises(ValueError, R, 10, 7, endian='foo')
  126. for n in range(20):
  127. self.assertEqual(R(n, k=0), zeros(n))
  128. self.assertEqual(R(n, k=n), ones(n))
  129. def test_count(self):
  130. for n in range(10): # test explicitly for small n
  131. for k in range(n + 1):
  132. a = random_k(n, k)
  133. self.assertEqual(len(a), n)
  134. self.assertEqual(a.count(), k)
  135. for _ in range(100):
  136. n = randrange(10_000)
  137. k = randint(0, n)
  138. a = random_k(n, k)
  139. self.assertEqual(len(a), n)
  140. self.assertEqual(a.count(), k)
  141. def test_active_bits(self):
  142. # test if all bits are active
  143. n = 240
  144. cum = zeros(n)
  145. for _ in range(1000):
  146. k = randint(30, 40)
  147. a = random_k(n, k)
  148. self.assertEqual(a.count(), k)
  149. cum |= a
  150. if cum.all():
  151. break
  152. else:
  153. self.fail()
  154. # test uses math.comb, added in 3.8
  155. @skipIf(sys.version_info[:2] < (3, 8))
  156. def test_combinations(self):
  157. # for entire range of 0 <= k <= n, validate that random_k()
  158. # generates all possible combinations
  159. n = 7
  160. total = 0
  161. for k in range(n + 1):
  162. expected = math.comb(n, k)
  163. combs = set()
  164. for _ in range(10_000):
  165. combs.add(frozenbitarray(random_k(n, k)))
  166. if len(combs) == expected:
  167. total += expected
  168. break
  169. else:
  170. self.fail()
  171. self.assertEqual(total, 2 ** n)
  172. def collect_code_branches(self):
  173. # return list of bitarrays from all code branches of random_k()
  174. res = []
  175. # test small k (no .combine_half())
  176. res.append(random_k(300, 10))
  177. # general cases
  178. for k in 100, 500, 2_500, 4_000:
  179. res.append(random_k(5_000, k))
  180. return res
  181. def test_seed(self):
  182. # We ensure that after setting a seed value, random_k() will
  183. # always return the same random bitarrays. However, we do not ensure
  184. # that these results will not change in future versions of bitarray.
  185. _set_default_endian("little")
  186. a = []
  187. for val in 654321, 654322, 654321, 654322:
  188. seed(val)
  189. a.append(self.collect_code_branches())
  190. self.assertEqual(a[0], a[2])
  191. self.assertEqual(a[1], a[3])
  192. for item0, item1 in zip(a[0], a[1]):
  193. self.assertNotEqual(item0, item1)
  194. # initialize seed with current system time again
  195. seed()
  196. # ---------------- tests for internal _Random methods -------------------
  197. def test_op_seq(self):
  198. r = _Random()
  199. G = r.op_seq
  200. K = r.K
  201. M = r.M
  202. # special cases
  203. self.assertRaises(ValueError, G, 0)
  204. self.assertEqual(G(1), zeros(M - 1))
  205. self.assertEqual(G(K // 2), bitarray())
  206. self.assertEqual(G(K - 1), ones(M - 1))
  207. self.assertRaises(ValueError, G, K)
  208. # examples
  209. for p, s in [
  210. (0.15625, '0100'),
  211. (0.25, '0'), # 1/2 AND -> 1/4
  212. (0.375, '10'), # 1/2 OR -> 3/4 AND -> 3/8
  213. (0.5, ''),
  214. (0.625, '01'), # 1/2 AND -> 1/4 OR -> 5/8
  215. (0.6875, '101'),
  216. (0.75, '1'), # 1/2 OR -> 3/4
  217. ]:
  218. seq = G(int(p * K))
  219. self.assertEqual(seq.to01(), s)
  220. for i in range(1, K):
  221. seq = G(i)
  222. self.assertTrue(0 <= len(s) < M)
  223. q = 0.5 # a = random_half()
  224. for k in seq:
  225. # k=0: AND k=1: OR
  226. if k:
  227. q += 0.5 * (1.0 - q) # a |= random_half()
  228. else:
  229. q *= 0.5 # a &= random_half()
  230. self.assertEqual(q, i / K)
  231. def test_combine_half(self):
  232. r = _Random(1_000_000)
  233. for seq, mean in [
  234. ([], 500_000), # .random_half() itself
  235. ([0], 250_000), # AND
  236. ([1], 750_000), # OR
  237. ([1, 0], 375_000), # OR followed by AND
  238. ]:
  239. a = r.combine_half(seq)
  240. self.assertTrue(abs(a.count() - mean) < 5_000)
  241. # ---------------------------- random_p() ---------------------------------
  242. HAVE_BINOMIALVARIATE = sys.version_info[:2] >= (3, 12)
  243. @skipIf(HAVE_BINOMIALVARIATE)
  244. class Random_P_Not_Implemented(unittest.TestCase):
  245. def test_not_implemented(self):
  246. self.assertRaises(NotImplementedError, random_p, 100, 0.25)
  247. @skipIf(not HAVE_BINOMIALVARIATE)
  248. class Random_P_Tests(unittest.TestCase):
  249. def test_basic(self):
  250. for _ in range(250):
  251. default_endian = choice(['little', 'big'])
  252. _set_default_endian(default_endian)
  253. endian = choice(['little', 'big', None])
  254. n = randrange(120)
  255. p = choice([0.0, 0.0001, 0.2, 0.5, 0.9, 1.0])
  256. a = random_p(n, p, endian)
  257. self.assertTrue(type(a), bitarray)
  258. self.assertEqual(len(a), n)
  259. self.assertEqual(a.endian, endian or default_endian)
  260. def test_inputs_and_edge_cases(self):
  261. R = random_p
  262. self.assertRaises(TypeError, R)
  263. self.assertRaises(TypeError, R, 0.25)
  264. self.assertRaises(TypeError, R, 1, "0.5")
  265. self.assertRaises(ValueError, R, -1)
  266. self.assertRaises(ValueError, R, 1, -0.5)
  267. self.assertRaises(ValueError, R, 1, p=1.5)
  268. self.assertRaises(ValueError, R, 1, 0.15, 'foo')
  269. self.assertRaises(ValueError, R, 10, 0.5, endian='foo')
  270. self.assertEqual(R(0), bitarray())
  271. for n in range(20):
  272. self.assertEqual(R(n, 0), zeros(n))
  273. self.assertEqual(len(R(n, 0.5)), n)
  274. self.assertEqual(R(n, p=1), ones(n))
  275. def test_default(self):
  276. a = random_p(10_000_000) # p defaults to 0.5
  277. # see if population is within expectation
  278. self.assertTrue(abs(a.count() - 5_000_000) <= 15_811)
  279. def test_count(self):
  280. for _ in range(500):
  281. n = choice([randrange(4, 120), randrange(100, 1000)])
  282. p = choice([0.0001, 0.001, 0.01, 0.1, 0.25, 0.5, 0.9])
  283. sigma = math.sqrt(n * p * (1.0 - p))
  284. a = random_p(n, p)
  285. self.assertEqual(len(a), n)
  286. self.assertTrue(abs(a.count() - n * p) < max(4, 10 * sigma))
  287. def collect_code_branches(self):
  288. # return list of bitarrays from all code branches of random_p()
  289. res = []
  290. # for default p=0.5, random_p uses getrandbits
  291. res.append(random_p(32))
  292. # test small p
  293. res.append(random_p(5_000, 0.002))
  294. # small n (note that p=0.4 will call the "literal definition" case)
  295. res.append(random_p(15, 0.4))
  296. # general cases
  297. for p in 0.1, 0.2, 0.375, 0.4999, 0.7:
  298. res.append(random_p(150, p))
  299. return res
  300. def test_seed(self):
  301. # We ensure that after setting a seed value, random_p() will always
  302. # return the same random bitarrays. However, we do not ensure that
  303. # these results will not change in future versions of bitarray.
  304. _set_default_endian("little")
  305. a = []
  306. for val in 123456, 123457, 123456, 123457:
  307. seed(val)
  308. a.append(self.collect_code_branches())
  309. self.assertEqual(a[0], a[2])
  310. self.assertEqual(a[1], a[3])
  311. for item0, item1 in zip(a[0], a[1]):
  312. self.assertNotEqual(item0, item1)
  313. # initialize seed with current system time again
  314. seed()
  315. def test_small_p_limit(self):
  316. # For understanding how the algorithm works, see ./doc/random_p.rst
  317. # Also, see VerificationTests in devel/test_random.py
  318. r = _Random()
  319. limit = 1.0 / (r.K + 1) # lower limit for p
  320. self.assertTrue(r.SMALL_P > limit)
  321. # ---------------------------- gen_primes() -------------------------------
  322. class PrimeTests(unittest.TestCase):
  323. primes = [
  324. 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61,
  325. 67, 71, 73, 79, 83, 89, 97, 101, 103, 107, 109, 113, 127, 131, 137,
  326. 139, 149, 151, 157, 163, 167, 173, 179, 181, 191, 193, 197, 199, 211,
  327. 223, 227, 229, 233, 239, 241, 251, 257, 263, 269, 271, 277, 281, 283,
  328. 293, 307, 311, 313, 317, 331, 337, 347, 349, 353, 359, 367, 373, 379,
  329. 383, 389, 397, 401, 409, 419, 421, 431, 433, 439, 443, 449, 457, 461,
  330. ]
  331. def test_errors(self):
  332. P = gen_primes
  333. self.assertRaises(TypeError, P, 3, 1)
  334. self.assertRaises(ValueError, P, "1.0")
  335. self.assertRaises(ValueError, P, -1)
  336. self.assertRaises(TypeError, P, 8, 4)
  337. self.assertRaises(TypeError, P, 8, foo="big")
  338. self.assertRaises(ValueError, P, 8, "foo")
  339. self.assertRaises(ValueError, P, 8, endian="foo")
  340. def test_explitcit(self):
  341. for n in range(230):
  342. default_endian = choice(['little', 'big'])
  343. _set_default_endian(default_endian)
  344. endian = choice(["little", "big", None])
  345. odd = getrandbits(1)
  346. a = gen_primes(n, endian, odd)
  347. self.assertEqual(len(a), n)
  348. self.assertEqual(a.endian, endian or default_endian)
  349. if odd:
  350. lst = [2] + [2 * i + 1 for i in a.search(1)]
  351. else:
  352. lst = [i for i in a.search(1)]
  353. self.assertEqual(lst, self.primes[:len(lst)])
  354. def test_cmp(self):
  355. N = 10_000
  356. c = ones(N)
  357. c[:2] = 0
  358. for i in range(int(math.sqrt(N) + 1.0)):
  359. if c[i]:
  360. c[i * i :: i] = 0
  361. self.assertEqual(list(c.search(1, 0, 462)), self.primes)
  362. for _ in range(20):
  363. n = randrange(N)
  364. endian = choice(["little", "big"])
  365. a = gen_primes(n, endian=endian)
  366. self.assertEqual(a, c[:n])
  367. self.assertEqual(a.endian, endian)
  368. b = gen_primes(n // 2, endian, odd=True)
  369. self.assertEqual(b, a[1::2])
  370. self.assertEqual(b, c[1:n:2])
  371. for _ in range(20):
  372. i = randrange(10, 100)
  373. x = randint(-1, 1)
  374. n = i * i + x
  375. self.assertEqual(gen_primes(n), c[:n])
  376. self.assertEqual(gen_primes(n // 2, odd=1), c[1:n:2])
  377. self.assertEqual(gen_primes(N), c)
  378. self.assertEqual(gen_primes(N // 2, odd=1), c[1::2])
  379. def test_count(self):
  380. for n, count, sum_p, sum_sqr_p in [
  381. ( 10, 4, 17, 87),
  382. ( 100, 25, 1_060, 65_796),
  383. ( 1_000, 168, 76_127, 49_345_379),
  384. (10_000, 1229, 5_736_396, 37_546_387_960),
  385. ]:
  386. a = gen_primes(n)
  387. self.assertEqual(len(a), n)
  388. self.assertEqual(a.count(), count)
  389. self.assertEqual(sum_indices(a), sum_p)
  390. self.assertEqual(sum_indices(a, 2), sum_sqr_p)
  391. b = gen_primes(n // 2, odd=1)
  392. self.assertEqual(len(b), n // 2)
  393. self.assertEqual(b.count() + 1, count) # +1 because of prime 2
  394. self.assertEqual(b, a[1::2])
  395. # ----------------------------- pprint() ----------------------------------
  396. class PPrintTests(unittest.TestCase):
  397. @staticmethod
  398. def get_code_string(a):
  399. f = StringIO()
  400. pprint(a, stream=f)
  401. return f.getvalue()
  402. def round_trip(self, a):
  403. b = eval(self.get_code_string(a))
  404. self.assertEqual(b, a)
  405. self.assertEqual(type(b), type(a))
  406. def test_bitarray(self):
  407. a = bitarray('110')
  408. self.assertEqual(self.get_code_string(a), "bitarray('110')\n")
  409. self.round_trip(a)
  410. def test_frozenbitarray(self):
  411. a = frozenbitarray('01')
  412. self.assertEqual(self.get_code_string(a), "frozenbitarray('01')\n")
  413. self.round_trip(a)
  414. def test_formatting(self):
  415. a = bitarray(200)
  416. for width in range(40, 130, 10):
  417. for n in range(1, 10):
  418. f = StringIO()
  419. pprint(a, stream=f, group=n, width=width)
  420. r = f.getvalue()
  421. self.assertEqual(eval(r), a)
  422. s = r.strip("bitary(')\n")
  423. for group in s.split()[:-1]:
  424. self.assertEqual(len(group), n)
  425. for line in s.split('\n'):
  426. self.assertTrue(len(line) < width)
  427. def test_fallback(self):
  428. for a in None, 'asd', [1, 2], bitarray(), frozenbitarray('1'):
  429. self.round_trip(a)
  430. def test_subclass(self):
  431. class Foo(bitarray):
  432. pass
  433. a = Foo()
  434. code = self.get_code_string(a)
  435. self.assertEqual(code, "Foo()\n")
  436. b = eval(code)
  437. self.assertEqual(b, a)
  438. self.assertEqual(type(b), type(a))
  439. def test_random(self):
  440. for n in range(150):
  441. self.round_trip(urandom(n))
  442. def test_file(self):
  443. tmpdir = tempfile.mkdtemp()
  444. tmpfile = os.path.join(tmpdir, 'testfile')
  445. a = urandom_2(1000)
  446. try:
  447. with open(tmpfile, 'w') as fo:
  448. pprint(a, fo)
  449. with open(tmpfile, 'r') as fi:
  450. b = eval(fi.read())
  451. self.assertEqual(a, b)
  452. finally:
  453. shutil.rmtree(tmpdir)
  454. # ----------------------------- strip() -----------------------------------
  455. class StripTests(unittest.TestCase, Util):
  456. def test_simple(self):
  457. self.assertRaises(TypeError, strip, '0110')
  458. self.assertRaises(TypeError, strip, bitarray(), 123)
  459. self.assertRaises(ValueError, strip, bitarray(), 'up')
  460. for default_endian in 'big', 'little':
  461. _set_default_endian(default_endian)
  462. a = bitarray('00010110000')
  463. self.assertEQUAL(strip(a), bitarray('0001011'))
  464. self.assertEQUAL(strip(a, 'left'), bitarray('10110000'))
  465. self.assertEQUAL(strip(a, 'both'), bitarray('1011'))
  466. b = frozenbitarray('00010110000')
  467. c = strip(b, 'both')
  468. self.assertEqual(c, bitarray('1011'))
  469. self.assertEqual(type(c), frozenbitarray)
  470. def test_zeros_ones(self):
  471. for _ in range(50):
  472. n = randrange(10)
  473. mode = choice(['left', 'right', 'both'])
  474. a = zeros(n)
  475. c = strip(a, mode)
  476. self.assertEqual(type(c), bitarray)
  477. self.assertEqual(len(c), 0)
  478. self.assertEqual(a, zeros(n))
  479. b = frozenbitarray(a)
  480. c = strip(b, mode)
  481. self.assertEqual(type(c), frozenbitarray)
  482. self.assertEqual(len(c), 0)
  483. a.setall(1)
  484. c = strip(a, mode)
  485. self.assertEqual(c, ones(n))
  486. def test_random(self):
  487. for a in self.randombitarrays():
  488. b = a.copy()
  489. f = frozenbitarray(a)
  490. s = a.to01()
  491. for mode, res in [
  492. ('left', bitarray(s.lstrip('0'), a.endian)),
  493. ('right', bitarray(s.rstrip('0'), a.endian)),
  494. ('both', bitarray(s.strip('0'), a.endian)),
  495. ]:
  496. c = strip(a, mode)
  497. self.assertEQUAL(c, res)
  498. self.assertEqual(type(c), bitarray)
  499. self.assertEQUAL(a, b)
  500. c = strip(f, mode)
  501. self.assertEQUAL(c, res)
  502. self.assertEqual(type(c), frozenbitarray)
  503. self.assertEQUAL(f, b)
  504. def test_one_set(self):
  505. for _ in range(10):
  506. n = randint(1, 10000)
  507. a = bitarray(n)
  508. a.setall(0)
  509. a[randrange(n)] = 1
  510. self.assertEqual(strip(a, 'both'), bitarray('1'))
  511. self.assertEqual(len(a), n)
  512. # ----------------------------- count_n() ---------------------------------
  513. class CountN_Tests(unittest.TestCase, Util):
  514. @staticmethod
  515. def count_n(a, n):
  516. "return lowest index i for which a[:i].count() == n"
  517. i, j = n, a.count(1, 0, n)
  518. while j < n:
  519. j += a[i]
  520. i += 1
  521. return i
  522. def check_result(self, a, n, i, v=1):
  523. self.assertEqual(a.count(v, 0, i), n)
  524. if i == 0:
  525. self.assertEqual(n, 0)
  526. else:
  527. self.assertEqual(a[i - 1], v)
  528. def test_empty(self):
  529. a = bitarray()
  530. self.assertEqual(count_n(a, 0), 0)
  531. self.assertEqual(count_n(a, 0, 0), 0)
  532. self.assertEqual(count_n(a, 0, 1), 0)
  533. self.assertRaises(ValueError, count_n, a, 1)
  534. self.assertRaises(TypeError, count_n, '', 0)
  535. self.assertRaises(TypeError, count_n, a, 7.0)
  536. self.assertRaises(ValueError, count_n, a, 0, 2)
  537. self.assertRaisesMessage(ValueError, "n = 1 larger than bitarray "
  538. "length 0", count_n, a, 1)
  539. def test_simple(self):
  540. a = bitarray('111110111110111110111110011110111110111110111000')
  541. b = a.copy()
  542. self.assertEqual(len(a), 48)
  543. self.assertEqual(a.count(), 37)
  544. self.assertEqual(a.count(0), 11)
  545. self.assertEqual(count_n(a, 0), 0)
  546. self.assertEqual(count_n(a, 0, 0), 0)
  547. self.assertEqual(count_n(a, 2, 0), 12)
  548. self.assertEqual(count_n(a, 10, 0), 47)
  549. self.assertEqual(count_n(a, 20), 23)
  550. self.assertEqual(count_n(a, 20, 1), 23)
  551. self.assertEqual(count_n(a, 37), 45)
  552. # n < 0
  553. self.assertRaisesMessage(ValueError, "non-negative integer expected",
  554. count_n, a, -1)
  555. # n > len(a)
  556. self.assertRaisesMessage(ValueError, "n = 49 larger than bitarray "
  557. "length 48", count_n, a, 49)
  558. # n > a.count(0)
  559. self.assertRaisesMessage(ValueError, "n = 12 exceeds total count "
  560. "(a.count(0) = 11)", count_n, a, 12, 0)
  561. # n > a.count(1)
  562. self.assertRaisesMessage(ValueError, "n = 38 exceeds total count "
  563. "(a.count(1) = 37)", count_n, a, 38, 1)
  564. for v in 0, 1:
  565. for n in range(a.count(v) + 1):
  566. i = count_n(a, n, v)
  567. self.check_result(a, n, i, v)
  568. self.assertEqual(a[:i].count(v), n)
  569. self.assertEqual(i, self.count_n(a if v else ~a, n))
  570. self.assertEQUAL(a, b)
  571. def test_frozenbitarray(self):
  572. a = frozenbitarray('001111101111101111101111100111100')
  573. self.assertEqual(len(a), 33)
  574. self.assertEqual(a.count(), 24)
  575. self.assertEqual(count_n(a, 0), 0)
  576. self.assertEqual(count_n(a, 10), 13)
  577. self.assertEqual(count_n(a, 24), 31)
  578. self.assertRaises(ValueError, count_n, a, -1) # n < 0
  579. self.assertRaises(ValueError, count_n, a, 25) # n > a.count()
  580. self.assertRaises(ValueError, count_n, a, 34) # n > len(a)
  581. for n in range(25):
  582. self.check_result(a, n, count_n(a, n))
  583. def test_ones(self):
  584. n = randint(1, 100_000)
  585. a = ones(n)
  586. self.assertEqual(count_n(a, n), n)
  587. self.assertRaises(ValueError, count_n, a, 1, 0)
  588. self.assertRaises(ValueError, count_n, a, n + 1)
  589. for _ in range(20):
  590. i = randint(0, n)
  591. self.assertEqual(count_n(a, i), i)
  592. def test_one_set(self):
  593. n = randint(1, 100_000)
  594. a = zeros(n)
  595. self.assertEqual(count_n(a, 0), 0)
  596. self.assertRaises(ValueError, count_n, a, 1)
  597. for _ in range(20):
  598. a.setall(0)
  599. i = randrange(n)
  600. a[i] = 1
  601. self.assertEqual(count_n(a, 1), i + 1)
  602. self.assertRaises(ValueError, count_n, a, 2)
  603. def test_last(self):
  604. for N in range(1, 1000):
  605. a = zeros(N)
  606. a[-1] = 1
  607. self.assertEqual(count_n(a, 1), N)
  608. if N == 1:
  609. msg = "n = 2 larger than bitarray length 1"
  610. else:
  611. msg = "n = 2 exceeds total count (a.count(1) = 1)"
  612. self.assertRaisesMessage(ValueError, msg, count_n, a, 2)
  613. def test_primes(self):
  614. a = gen_primes(10_000)
  615. # there are 1229 primes below 10,000
  616. self.assertEqual(a.count(), 1229)
  617. for n, p in [( 10, 29), # the 10th prime number is 29
  618. ( 100, 541), # the 100th prime number is 541
  619. (1000, 7919)]: # the 1000th prime number is 7919
  620. self.assertEqual(count_n(a, n) - 1, p)
  621. def test_large(self):
  622. for _ in range(100):
  623. N = randint(100_000, 250_000)
  624. a = bitarray(N)
  625. v = getrandbits(1)
  626. a.setall(not v)
  627. for _ in range(randrange(100)):
  628. a[randrange(N)] = v
  629. tc = a.count(v) # total count
  630. i = count_n(a, tc, v)
  631. self.check_result(a, tc, i, v)
  632. n = tc + 1
  633. self.assertRaisesMessage(ValueError, "n = %d exceeds total count "
  634. "(a.count(%d) = %d)" % (n, v, tc),
  635. count_n, a, n, v)
  636. for _ in range(20):
  637. n = randint(0, tc)
  638. i = count_n(a, n, v)
  639. self.check_result(a, n, i, v)
  640. # ---------------------------------------------------------------------------
  641. class BitwiseCountTests(unittest.TestCase, Util):
  642. def test_count_byte(self):
  643. for i in range(256):
  644. a = bitarray(bytearray([i]))
  645. cnt = a.count()
  646. self.assertEqual(count_and(a, zeros(8)), 0)
  647. self.assertEqual(count_and(a, ones(8)), cnt)
  648. self.assertEqual(count_and(a, a), cnt)
  649. self.assertEqual(count_or(a, zeros(8)), cnt)
  650. self.assertEqual(count_or(a, ones(8)), 8)
  651. self.assertEqual(count_or(a, a), cnt)
  652. self.assertEqual(count_xor(a, zeros(8)), cnt)
  653. self.assertEqual(count_xor(a, ones(8)), 8 - cnt)
  654. self.assertEqual(count_xor(a, a), 0)
  655. def test_1(self):
  656. a = bitarray('001111')
  657. aa = a.copy()
  658. b = bitarray('010011')
  659. bb = b.copy()
  660. self.assertEqual(count_and(a, b), 2)
  661. self.assertEqual(count_or(a, b), 5)
  662. self.assertEqual(count_xor(a, b), 3)
  663. for f in count_and, count_or, count_xor:
  664. # not two arguments
  665. self.assertRaises(TypeError, f)
  666. self.assertRaises(TypeError, f, a)
  667. self.assertRaises(TypeError, f, a, b, 3)
  668. # wrong argument types
  669. self.assertRaises(TypeError, f, a, '')
  670. self.assertRaises(TypeError, f, '1', b)
  671. self.assertRaises(TypeError, f, a, 4)
  672. self.assertEQUAL(a, aa)
  673. self.assertEQUAL(b, bb)
  674. b.append(1)
  675. for f in count_and, count_or, count_xor:
  676. self.assertRaises(ValueError, f, a, b)
  677. self.assertRaises(ValueError, f,
  678. bitarray('110', 'big'),
  679. bitarray('101', 'little'))
  680. def test_frozen(self):
  681. a = frozenbitarray('001111')
  682. b = frozenbitarray('010011')
  683. self.assertEqual(count_and(a, b), 2)
  684. self.assertEqual(count_or(a, b), 5)
  685. self.assertEqual(count_xor(a, b), 3)
  686. def test_random(self):
  687. for _ in range(100):
  688. n = randrange(1000)
  689. a = urandom_2(n)
  690. b = urandom(n, a.endian)
  691. self.assertEqual(count_and(a, b), (a & b).count())
  692. self.assertEqual(count_or(a, b), (a | b).count())
  693. self.assertEqual(count_xor(a, b), (a ^ b).count())
  694. def test_misc(self):
  695. for a in self.randombitarrays():
  696. n = len(a)
  697. b = urandom(n, a.endian)
  698. # any and
  699. self.assertEqual(any(a & b), count_and(a, b) > 0)
  700. self.assertEqual(any_and(a, b), any(a & b))
  701. # any or
  702. self.assertEqual(any(a | b), count_or(a, b) > 0)
  703. self.assertEqual(any(a | b), any(a) or any(b))
  704. # any xor
  705. self.assertEqual(any(a ^ b), count_xor(a, b) > 0)
  706. self.assertEqual(any(a ^ b), a != b)
  707. # all and
  708. self.assertEqual(all(a & b), count_and(a, b) == n)
  709. self.assertEqual(all(a & b), all(a) and all(b))
  710. # all or
  711. self.assertEqual(all(a | b), count_or(a, b) == n)
  712. # all xor
  713. self.assertEqual(all(a ^ b), count_xor(a, b) == n)
  714. self.assertEqual(all(a ^ b), a == ~b)
  715. # --------------------------- any_and() -----------------------------------
  716. class BitwiseAnyTests(unittest.TestCase, Util):
  717. def test_basic(self):
  718. a = frozenbitarray('0101')
  719. b = bitarray('0111')
  720. self.assertTrue(any_and(a, b))
  721. self.assertRaises(TypeError, any_and)
  722. self.assertRaises(TypeError, any_and, a, 4)
  723. b.append(1)
  724. self.assertRaises(ValueError, any_and, a, b)
  725. self.assertRaises(ValueError, any_and,
  726. bitarray('01', 'little'),
  727. bitarray('11', 'big'))
  728. def test_overlap(self):
  729. n = 100
  730. for _ in range(500):
  731. i1 = randint(0, n)
  732. j1 = randint(i1, n)
  733. r1 = range(i1, j1)
  734. i2 = randint(0, n)
  735. j2 = randint(i2, n)
  736. r2 = range(i2, j2)
  737. # test if ranges r1 and r2 overlap
  738. res1 = bool(r1) and bool(r2) and (i2 in r1 or i1 in r2)
  739. res2 = bool(set(r1) & set(r2))
  740. self.assertEqual(res1, res2)
  741. a1, a2 = bitarray(n), bitarray(n)
  742. a1[i1:j1] = a2[i2:j2] = 1
  743. self.assertEqual(any_and(a1, a2), res1)
  744. def test_common(self):
  745. n = 100
  746. for _ in range(500):
  747. s1 = self.random_slice(n)
  748. s2 = self.random_slice(n)
  749. r1 = range(n)[s1]
  750. r2 = range(n)[s2]
  751. # test if ranges r1 and r2 have common items
  752. a1, a2 = bitarray(n), bitarray(n)
  753. a1[s1] = a2[s2] = 1
  754. self.assertEqual(any_and(a1, a2), bool(set(r1) & set(r2)))
  755. def check(self, a, b):
  756. r = any_and(a, b)
  757. self.assertEqual(type(r), bool)
  758. self.assertEqual(r, any_and(b, a)) # symmetry
  759. self.assertEqual(r, any(a & b))
  760. self.assertEqual(r, (a & b).any())
  761. self.assertEqual(r, count_and(a, b) > 0)
  762. def test_explitcit(self):
  763. for a, b , res in [
  764. ('', '', False),
  765. ('0', '1', False),
  766. ('0', '0', False),
  767. ('1', '1', True),
  768. ('00011', '11100', False),
  769. ('00001011 1', '01000100 1', True)]:
  770. a = bitarray(a)
  771. b = bitarray(b)
  772. self.assertTrue(any_and(a, b) is res)
  773. self.check(a, b)
  774. def test_random(self):
  775. for a in self.randombitarrays():
  776. n = len(a)
  777. b = urandom(n, a.endian)
  778. self.check(a, b)
  779. def test_one(self):
  780. for n in range(1, 300):
  781. a = zeros(n)
  782. b = urandom(n)
  783. i = randrange(n)
  784. a[i] = 1
  785. self.assertEqual(b[i], any_and(a, b))
  786. # ---------------------------- subset() -----------------------------------
  787. class SubsetTests(unittest.TestCase, Util):
  788. def test_basic(self):
  789. a = frozenbitarray('0101')
  790. b = bitarray('0111')
  791. self.assertTrue(subset(a, b))
  792. self.assertFalse(subset(b, a))
  793. self.assertRaises(TypeError, subset)
  794. self.assertRaises(TypeError, subset, a, '')
  795. self.assertRaises(TypeError, subset, '1', b)
  796. self.assertRaises(TypeError, subset, a, 4)
  797. b.append(1)
  798. self.assertRaises(ValueError, subset, a, b)
  799. self.assertRaises(ValueError, subset,
  800. bitarray('01', 'little'),
  801. bitarray('11', 'big'))
  802. def check(self, a, b, res):
  803. r = subset(a, b)
  804. self.assertEqual(type(r), bool)
  805. self.assertEqual(r, res)
  806. self.assertEqual(a | b == b, res)
  807. self.assertEqual(a & b == a, res)
  808. def test_True(self):
  809. for a, b in [('', ''), ('0', '1'), ('0', '0'), ('1', '1'),
  810. ('000', '111'), ('0101', '0111'),
  811. ('000010111', '010011111')]:
  812. self.check(bitarray(a), bitarray(b), True)
  813. def test_False(self):
  814. for a, b in [('1', '0'), ('1101', '0111'),
  815. ('0000101111', '0100111011')]:
  816. self.check(bitarray(a), bitarray(b), False)
  817. def test_random(self):
  818. for a in self.randombitarrays(start=1):
  819. b = a.copy()
  820. # we set one random bit in b to 1, so a is always a subset of b
  821. b[randrange(len(a))] = 1
  822. self.check(a, b, True)
  823. # but b is only a subset when they are equal
  824. self.check(b, a, a == b)
  825. # we set all bits in a, which ensures that b is a subset of a
  826. a.setall(1)
  827. self.check(b, a, True)
  828. # ------------------------- correspond_all() ------------------------------
  829. class CorrespondAllTests(unittest.TestCase):
  830. def test_basic(self):
  831. a = frozenbitarray('0101')
  832. b = bitarray('0111')
  833. self.assertTrue(correspond_all(a, b), (1, 1, 1, 1))
  834. self.assertRaises(TypeError, correspond_all)
  835. b.append(1)
  836. self.assertRaises(ValueError, correspond_all, a, b)
  837. self.assertRaises(ValueError, correspond_all,
  838. bitarray('01', 'little'),
  839. bitarray('11', 'big'))
  840. def test_explitcit(self):
  841. for a, b, res in [
  842. ('', '', (0, 0, 0, 0)),
  843. ('0000011111',
  844. '0000100111', (4, 1, 2, 3)),
  845. ]:
  846. self.assertEqual(correspond_all(bitarray(a), bitarray(b)), res)
  847. def test_random(self):
  848. for _ in range(100):
  849. n = randrange(3000)
  850. a = urandom_2(n)
  851. b = urandom(n, a.endian)
  852. res = correspond_all(a, b)
  853. self.assertEqual(res[0], count_and(~a, ~b))
  854. self.assertEqual(res[1], count_and(~a, b))
  855. self.assertEqual(res[2], count_and(a, ~b))
  856. self.assertEqual(res[3], count_and(a, b))
  857. self.assertEqual(res[0], n - count_or(a, b))
  858. self.assertEqual(res[1] + res[2], count_xor(a, b))
  859. self.assertEqual(sum(res), n)
  860. # ----------------------------- byteswap() --------------------------------
  861. @skipIf(is_pypy)
  862. class ByteSwapTests(unittest.TestCase):
  863. def test_basic_bytearray(self):
  864. a = bytearray(b"ABCD")
  865. byteswap(a, 2)
  866. self.assertEqual(a, bytearray(b"BADC"))
  867. byteswap(a)
  868. self.assertEqual(a, bytearray(b"CDAB"))
  869. a = bytearray(b"ABCDEF")
  870. byteswap(a, 3)
  871. self.assertEqual(a, bytearray(b"CBAFED"))
  872. byteswap(a, 1)
  873. self.assertEqual(a, bytearray(b"CBAFED"))
  874. def test_basic_bitarray(self):
  875. a = bitarray("11110000 01010101")
  876. byteswap(a)
  877. self.assertEqual(a, bitarray("01010101 11110000"))
  878. a = bitarray("01111000 1001")
  879. b = a.copy()
  880. a.tobytes() # clear padbits
  881. byteswap(a)
  882. self.assertEqual(a, bitarray("10010000 0111"))
  883. byteswap(a)
  884. self.assertEqual(a, b)
  885. def test_basic_array(self):
  886. r = os.urandom(64)
  887. for typecode in array.typecodes:
  888. # type code 'u' is deprecated and will be removed in Python 3.16
  889. if typecode == 'u':
  890. continue
  891. a = array.array(typecode, r)
  892. self.assertEqual(len(a) * a.itemsize, 64)
  893. a.byteswap()
  894. byteswap(a, a.itemsize)
  895. self.assertEqual(a.tobytes(), r)
  896. def test_empty(self):
  897. a = bytearray()
  898. byteswap(a)
  899. self.assertEqual(a, bytearray())
  900. for n in range(10):
  901. byteswap(a, n)
  902. self.assertEqual(a, bytearray())
  903. def test_one_byte(self):
  904. a = bytearray(b'\xab')
  905. byteswap(a)
  906. self.assertEqual(a, bytearray(b'\xab'))
  907. for n in range(2):
  908. byteswap(a, n)
  909. self.assertEqual(a, bytearray(b'\xab'))
  910. def test_errors(self):
  911. # buffer not writable
  912. for a in b"AB", frozenbitarray(16):
  913. self.assertRaises(BufferError, byteswap, a)
  914. a = bytearray(b"ABCD")
  915. b = bitarray(32)
  916. for n in -1, 3, 5, 6:
  917. # byte size not multiple of n
  918. self.assertRaises(ValueError, byteswap, a, n)
  919. self.assertRaises(ValueError, byteswap, b, n)
  920. def test_range(self):
  921. for n in range(20):
  922. for m in range(20):
  923. r = os.urandom(m * n)
  924. a = bytearray(r)
  925. byteswap(a, n)
  926. lst = []
  927. for i in range(m):
  928. x = r[i * n:i * n + n]
  929. lst.extend(x[::-1])
  930. self.assertEqual(a, bytearray(lst))
  931. def test_reverse_bytearray(self):
  932. for n in range(100):
  933. r = os.urandom(n)
  934. a = bytearray(r)
  935. byteswap(a)
  936. self.assertEqual(a, bytearray(r[::-1]))
  937. def test_reverse_bitarray(self):
  938. for n in range(100):
  939. a = urandom(8 * n)
  940. b = a.copy()
  941. byteswap(a)
  942. a.bytereverse()
  943. self.assertEqual(a, b[::-1])
  944. # ------------------------------ parity() ---------------------------------
  945. class ParityTests(unittest.TestCase):
  946. def test_explitcit(self):
  947. for s, res in [('', 0), ('1', 1), ('0010011', 1), ('10100110', 0)]:
  948. self.assertTrue(parity(bitarray(s)) is res)
  949. self.assertTrue(parity(frozenbitarray(s)) is res)
  950. def test_zeros_ones(self):
  951. for n in range(2000):
  952. self.assertEqual(parity(zeros(n)), 0)
  953. self.assertEqual(parity(ones(n)), n % 2)
  954. def test_random(self):
  955. endian = choice(["little", "big"])
  956. a = bitarray(endian=endian)
  957. par = 0
  958. for i in range(2000):
  959. self.assertEqual(parity(a), par)
  960. self.assertEqual(par, a.count() % 2)
  961. self.assertEqual(a.endian, endian)
  962. self.assertEqual(len(a), i)
  963. v = getrandbits(1)
  964. a.append(v)
  965. par ^= v
  966. def test_wrong_args(self):
  967. self.assertRaises(TypeError, parity, '')
  968. self.assertRaises(TypeError, parity, 1)
  969. self.assertRaises(TypeError, parity)
  970. self.assertRaises(TypeError, parity, bitarray("110"), 1)
  971. # ---------------------------- sum_indices() ------------------------------
  972. class SumIndicesUtil(unittest.TestCase):
  973. def check_explicit(self, S):
  974. for s, r1, r2 in [
  975. ("", 0, 0), ("0", 0, 0), ("1", 0, 0), ("11", 1, 1),
  976. ("011", 3, 5), ("001", 2, 4), ("0001100", 7, 25),
  977. ("00001111", 22, 126), ("01100111 1101", 49, 381),
  978. ]:
  979. for a in [bitarray(s, choice(['little', 'big'])),
  980. frozenbitarray(s, choice(['little', 'big']))]:
  981. self.assertEqual(S(a, 1), r1)
  982. self.assertEqual(S(a, 2), r2)
  983. self.assertEqual(a, bitarray(s))
  984. def check_wrong_args(self, S):
  985. self.assertRaises(TypeError, S, '')
  986. self.assertRaises(TypeError, S, 1.0)
  987. self.assertRaises(TypeError, S)
  988. for mode in -1, 0, 3, 4:
  989. self.assertRaises(ValueError, S, bitarray("110"), mode)
  990. def check_urandom(self, S, n):
  991. a = urandom_2(n)
  992. self.assertEqual(S(a, 1), sum(i for i, v in enumerate(a) if v))
  993. self.assertEqual(S(a, 2), sum(i * i for i, v in enumerate(a) if v))
  994. def check_sparse(self, S, n, k, mode=1, freeze=False, inv=False):
  995. a = zeros(n, choice(['little', 'big']))
  996. self.assertEqual(S(a, mode), 0)
  997. self.assertFalse(a.any())
  998. indices = sample(range(n), k)
  999. a[indices] = 1
  1000. res = sum(indices) if mode == 1 else sum(i * i for i in indices)
  1001. if inv:
  1002. a.invert()
  1003. sum_ones = 3 if mode == 1 else 2 * n - 1
  1004. sum_ones *= n * (n - 1)
  1005. sum_ones //= 6
  1006. res = sum_ones - res
  1007. if freeze:
  1008. a = frozenbitarray(a)
  1009. c = a.copy()
  1010. self.assertEqual(a.count(), n - k if inv else k)
  1011. self.assertEqual(S(a, mode), res)
  1012. self.assertEqual(a, c)
  1013. class SSQI_Tests(SumIndicesUtil):
  1014. # Additional tests for _ssqi() in: devel/test_sum_indices.py
  1015. def test_explicit(self):
  1016. self.check_explicit(_ssqi)
  1017. def test_wrong_args(self):
  1018. self.check_wrong_args(_ssqi)
  1019. def test_small(self):
  1020. a = bitarray()
  1021. sm1 = sm2 = 0
  1022. for i in range(100):
  1023. v = getrandbits(1)
  1024. a.append(v)
  1025. if v:
  1026. sm1 += i
  1027. sm2 += i * i
  1028. self.assertEqual(_ssqi(a, 1), sm1)
  1029. self.assertEqual(_ssqi(a, 2), sm2)
  1030. def test_urandom(self):
  1031. self.check_urandom(_ssqi, 10_037)
  1032. def test_sparse(self):
  1033. for _ in range(5):
  1034. mode = randint(1, 2)
  1035. freeze = getrandbits(1)
  1036. inv = getrandbits(1)
  1037. self.check_sparse(_ssqi, n=1_000_003, k=400,
  1038. mode=mode, freeze=freeze, inv=inv)
  1039. class SumIndicesTests(SumIndicesUtil):
  1040. # Additional tests in: devel/test_sum_indices.py
  1041. def test_explicit(self):
  1042. self.check_explicit(sum_indices)
  1043. a = gen_primes(100)
  1044. self.assertEqual(sum_indices(a, mode=1), 1_060)
  1045. self.assertEqual(sum_indices(a, mode=2), 65_796)
  1046. def test_wrong_args(self):
  1047. self.check_wrong_args(sum_indices)
  1048. def test_ones(self):
  1049. for mode in 1, 2:
  1050. self.check_sparse(sum_indices, n=1_600_037, k=0,
  1051. mode=mode, freeze=True, inv=True)
  1052. def test_sparse(self):
  1053. for _ in range(20):
  1054. n = choice([500_029, 600_011]) # below and above block size
  1055. k = randrange(1_000)
  1056. mode = randint(1, 2)
  1057. freeze = getrandbits(1)
  1058. inv = getrandbits(1)
  1059. self.check_sparse(sum_indices, n, k, mode, freeze, inv)
  1060. # ---------------------------------------------------------------------------
  1061. class XoredIndicesTests(unittest.TestCase, Util):
  1062. def test_explicit(self):
  1063. for s, r in [("", 0), ("0", 0), ("1", 0), ("11", 1),
  1064. ("011", 3), ("001", 2), ("0001100", 7),
  1065. ("01100111 1101", 13)]:
  1066. for a in [bitarray(s, self.random_endian()),
  1067. frozenbitarray(s, self.random_endian())]:
  1068. self.assertEqual(xor_indices(a), r)
  1069. def test_wrong_args(self):
  1070. X = xor_indices
  1071. self.assertRaises(TypeError, X, '')
  1072. self.assertRaises(TypeError, X, 1)
  1073. self.assertRaises(TypeError, X)
  1074. self.assertRaises(TypeError, X, bitarray("110"), 1)
  1075. def test_ones(self):
  1076. # OEIS A003815
  1077. lst = [0, 1, 3, 0, 4, 1, 7, 0, 8, 1, 11, 0, 12, 1, 15, 0, 16, 1, 19]
  1078. self.assertEqual([xor_indices(ones(i)) for i in range(1, 20)], lst)
  1079. a = bitarray()
  1080. x = 0
  1081. for i in range(1000):
  1082. a.append(1)
  1083. x ^= i
  1084. self.assertEqual(xor_indices(a), x)
  1085. if i < 19:
  1086. self.assertEqual(lst[i], x)
  1087. def test_primes(self):
  1088. # OEIS A126084
  1089. lst = [0, 2, 1, 4, 3, 8, 5, 20, 7, 16, 13, 18, 55, 30, 53, 26, 47]
  1090. primes = gen_primes(1000)
  1091. x = 0
  1092. for i, p in enumerate(primes.search(1)):
  1093. self.assertEqual(xor_indices(primes[:p]), x)
  1094. if i < 17:
  1095. self.assertEqual(lst[i], x)
  1096. x ^= p
  1097. def test_large_random(self):
  1098. n = 10_037
  1099. for a in [urandom_2(n), frozenbitarray(urandom_2(n))]:
  1100. res = reduce(operator.xor, (i for i, v in enumerate(a) if v))
  1101. b = a.copy()
  1102. self.assertEqual(xor_indices(a), res)
  1103. self.assertEqual(a, b)
  1104. def test_random(self):
  1105. for a in self.randombitarrays():
  1106. c = 0
  1107. for i, v in enumerate(a):
  1108. c ^= i * v
  1109. self.assertEqual(xor_indices(a), c)
  1110. def test_flips(self):
  1111. a = bitarray(128)
  1112. c = 0
  1113. for _ in range(1000):
  1114. self.assertEqual(xor_indices(a), c)
  1115. i = randrange(len(a))
  1116. a.invert(i)
  1117. c ^= i
  1118. def test_error_correct(self):
  1119. parity_bits = [1, 2, 4, 8, 16, 32, 64, 128] # parity bit positions
  1120. a = urandom(256)
  1121. a[parity_bits] = 0
  1122. c = xor_indices(a)
  1123. # set parity bits such that block is well prepared
  1124. a[parity_bits] = int2ba(c, length=8, endian="little")
  1125. for i in range(0, 256):
  1126. self.assertEqual(xor_indices(a), 0) # ensure well prepared
  1127. a.invert(i)
  1128. self.assertEqual(xor_indices(a), i) # index of the flipped bit!
  1129. a.invert(i)
  1130. # ------------------ intervals of uninterrupted runs --------------------
  1131. def runs(a):
  1132. "return number of uninterrupted intervals of 1s and 0s"
  1133. n = len(a)
  1134. if n < 2:
  1135. return n
  1136. return 1 + count_xor(a[:-1], a[1:])
  1137. class IntervalsTests(unittest.TestCase, Util):
  1138. def test_explicit(self):
  1139. for s, lst in [
  1140. ('', []),
  1141. ('0', [(0, 0, 1)]),
  1142. ('1', [(1, 0, 1)]),
  1143. ('00111100 0000011',
  1144. [(0, 0, 2), (1, 2, 6), (0, 6, 13), (1, 13, 15)]),
  1145. ]:
  1146. a = bitarray(s)
  1147. self.assertEqual(list(intervals(a)), lst)
  1148. self.assertEqual(runs(a), len(lst))
  1149. def test_uniform(self):
  1150. for n in range(1, 100):
  1151. for v in 0, 1:
  1152. a = n * bitarray([v], self.random_endian())
  1153. self.assertEqual(list(intervals(a)), [(v, 0, n)])
  1154. self.assertEqual(runs(a), 1)
  1155. def test_random(self):
  1156. for a in self.randombitarrays():
  1157. n = len(a)
  1158. b = urandom(n)
  1159. for value, start, stop in intervals(a):
  1160. self.assertFalse(isinstance(value, bool))
  1161. self.assertTrue(0 <= start < stop <= n)
  1162. b[start:stop] = value
  1163. self.assertEqual(a, b)
  1164. def test_list_runs(self):
  1165. for a in self.randombitarrays():
  1166. # list of length of runs of alternating bits
  1167. alt_runs = [stop - start for _, start, stop in intervals(a)]
  1168. self.assertEqual(len(alt_runs), runs(a))
  1169. b = bitarray()
  1170. v = a[0] if a else None # value of first run
  1171. for length in alt_runs:
  1172. self.assertTrue(length > 0)
  1173. b.extend(length * bitarray([v]))
  1174. v = not v
  1175. self.assertEqual(a, b)
  1176. # -------------------------- ba2hex() hex2ba() ---------------------------
  1177. class HexlifyTests(unittest.TestCase, Util):
  1178. def test_explicit(self):
  1179. data = [ # little big
  1180. ('', '', ''),
  1181. ('1000', '1', '8'),
  1182. ('0101 0110', 'a6', '56'),
  1183. ('0100 1001 1101', '29b', '49d'),
  1184. ('0000 1100 1110 1111', '037f', '0cef'),
  1185. ]
  1186. for bs, hex_le, hex_be in data:
  1187. a_be = bitarray(bs, 'big')
  1188. a_le = bitarray(bs, 'little')
  1189. self.assertEQUAL(hex2ba(hex_be, 'big'), a_be)
  1190. self.assertEQUAL(hex2ba(hex_le, 'little'), a_le)
  1191. self.assertEqual(ba2hex(a_be), hex_be)
  1192. self.assertEqual(ba2hex(a_le), hex_le)
  1193. def test_ba2hex_group(self):
  1194. a = bitarray('1000 0000 0101 1111', 'little')
  1195. self.assertEqual(ba2hex(a), "10af")
  1196. self.assertEqual(ba2hex(a, 0), "10af")
  1197. self.assertEqual(ba2hex(a, 1, ""), "10af")
  1198. self.assertEqual(ba2hex(a, 1), "1 0 a f")
  1199. self.assertEqual(ba2hex(a, group=2), "10 af")
  1200. self.assertEqual(ba2hex(a, 2, "-"), "10-af")
  1201. self.assertEqual(ba2hex(a, group=3, sep="_"), "10a_f")
  1202. self.assertEqual(ba2hex(a, 3, sep=", "), "10a, f")
  1203. def test_ba2hex_errors(self):
  1204. self.assertRaises(TypeError, ba2hex)
  1205. self.assertRaises(TypeError, ba2hex, None)
  1206. self.assertRaises(TypeError, ba2hex, '101')
  1207. # length not multiple of 4
  1208. self.assertRaises(ValueError, ba2hex, bitarray('10'))
  1209. a = bitarray('1000 0000 0101 1111', 'little')
  1210. self.assertRaises(ValueError, ba2hex, a, -1)
  1211. self.assertRaises(ValueError, ba2hex, a, group=-1)
  1212. # sep not str
  1213. self.assertRaises(TypeError, ba2hex, a, 1, b" ")
  1214. # embedded null character in sep
  1215. self.assertRaises(ValueError, ba2hex, a, 2, " \0")
  1216. def test_hex2ba_whitespace(self):
  1217. _set_default_endian('big')
  1218. self.assertEqual(hex2ba("F1 FA %s f3 c0" % whitespace),
  1219. bitarray("11110001 11111010 11110011 11000000"))
  1220. self.assertEQUAL(hex2ba(b' a F ', 'big'),
  1221. bitarray('1010 1111', 'big'))
  1222. self.assertEQUAL(hex2ba(860 * " " + '0 1D' + 590 * " ", 'little'),
  1223. bitarray('0000 1000 1011', 'little'))
  1224. def test_hex2ba_errors(self):
  1225. self.assertRaises(TypeError, hex2ba, 0)
  1226. self.assertRaises(TypeError, hex2ba, "F", 1)
  1227. self.assertRaises(ValueError, hex2ba, "F", "foo")
  1228. for s in '01a7g89', '0\u20ac', '0 \0', b'\x00':
  1229. self.assertRaises(ValueError, hex2ba, s)
  1230. for s in 'g', 'ag', 'aag' 'aaaga', 'ag':
  1231. msg = "invalid digit found for base16, got 'g' (0x67)"
  1232. self.assertRaisesMessage(ValueError, msg, hex2ba, s, 'big')
  1233. def test_hex2ba_types(self):
  1234. for c in 'e', 'E', b'e', b'E', bytearray(b'e'), bytearray(b'E'):
  1235. a = hex2ba(c, "big")
  1236. self.assertEqual(a.to01(), '1110')
  1237. self.assertEqual(a.endian, 'big')
  1238. self.assertEqual(type(a), bitarray)
  1239. def test_random(self):
  1240. for _ in range(100):
  1241. default_endian = self.random_endian()
  1242. _set_default_endian(default_endian)
  1243. endian = choice(["little", "big", None])
  1244. a = urandom_2(4 * randrange(100), endian)
  1245. s = ba2hex(a, group=randrange(10), sep=choice(whitespace))
  1246. b = hex2ba(s, endian)
  1247. self.assertEqual(b.endian, endian or default_endian)
  1248. self.assertEqual(a, b)
  1249. self.check_obj(b)
  1250. def test_hexdigits(self):
  1251. a = hex2ba(hexdigits)
  1252. self.assertEqual(len(a), 4 * len(hexdigits))
  1253. self.assertEqual(type(a), bitarray)
  1254. self.check_obj(a)
  1255. t = ba2hex(a)
  1256. self.assertEqual(t, hexdigits.lower())
  1257. self.assertEqual(type(t), str)
  1258. self.assertEQUAL(a, hex2ba(t))
  1259. def test_binascii(self):
  1260. a = urandom(80, 'big')
  1261. s = binascii.hexlify(a.tobytes()).decode()
  1262. self.assertEqual(ba2hex(a), s)
  1263. b = bitarray(binascii.unhexlify(s), endian='big')
  1264. self.assertEQUAL(hex2ba(s, 'big'), b)
  1265. # -------------------------- ba2base() base2ba() -------------------------
  1266. class BaseTests(unittest.TestCase, Util):
  1267. def test_explicit(self):
  1268. data = [ # n little big
  1269. ('', 2, '', ''),
  1270. ('1 0 1', 2, '101', '101'),
  1271. ('11 01 00', 4, '320', '310'),
  1272. ('111 001', 8, '74', '71'),
  1273. ('1111 0001', 16, 'f8', 'f1'),
  1274. ('11111 00001', 32, '7Q', '7B'),
  1275. ('111111 000001', 64, '/g', '/B'),
  1276. ]
  1277. for bs, n, s_le, s_be in data:
  1278. a_le = bitarray(bs, 'little')
  1279. a_be = bitarray(bs, 'big')
  1280. self.assertEQUAL(base2ba(n, s_le, 'little'), a_le)
  1281. self.assertEQUAL(base2ba(n, s_be, 'big'), a_be)
  1282. self.assertEqual(ba2base(n, a_le), s_le)
  1283. self.assertEqual(ba2base(n, a_be), s_be)
  1284. def test_base2ba_types(self):
  1285. for c in '7', b'7', bytearray(b'7'):
  1286. a = base2ba(32, c)
  1287. self.assertEqual(a.to01(), '11111')
  1288. self.assertEqual(type(a), bitarray)
  1289. def test_base2ba_whitespace(self):
  1290. self.assertEqual(base2ba(8, bytearray(b"17 0"), "little"),
  1291. bitarray("100 111 000"))
  1292. self.assertEqual(base2ba(32, "7 A"), bitarray("11111 00000"))
  1293. self.assertEqual(base2ba(64, b"A /"), bitarray("000000 111111"))
  1294. for n in 2, 4, 8, 16, 32, 64:
  1295. a = base2ba(n, whitespace)
  1296. self.assertEqual(a, bitarray())
  1297. a = urandom(60)
  1298. c = list(ba2base(n, a))
  1299. for _ in range(randrange(80)):
  1300. c.insert(randint(0, len(c)), choice(whitespace))
  1301. s = ''.join(c)
  1302. self.assertEqual(base2ba(n, s), a)
  1303. def test_ba2base_group(self):
  1304. a = bitarray("001 011 100 111", "little")
  1305. self.assertEqual(ba2base(8, a, 3), "461 7")
  1306. self.assertEqual(ba2base(8, a, group=2), "46 17")
  1307. self.assertEqual(ba2base(8, a, sep="_", group=2), "46_17")
  1308. self.assertEqual(ba2base(8, a, 2, sep="."), "46.17")
  1309. for n, s, group, sep, res in [
  1310. (2, '10100', 2, '-', '10-10-0'),
  1311. (4, '10 11 00 01', 1, "_", "2_3_0_1"),
  1312. (8, "101 100 011 101 001 010", 3, " ", "543 512"),
  1313. (8, "101 100 011 101 001 010", 3, "", "543512"),
  1314. (16, '1011 0001 1101 1010 1111', 4, "+", "b1da+f"),
  1315. (32, "10110 00111 01101 01111", 2, ", ", "WH, NP"),
  1316. (64, "101100 011101 101011 111110 101110", 2, ".", "sd.r+.u"),
  1317. ]:
  1318. a = bitarray(s, "big")
  1319. s = ba2base(n, a, group, sep)
  1320. self.assertEqual(type(s), str)
  1321. self.assertEqual(s, res)
  1322. def test_empty(self):
  1323. for n in 2, 4, 8, 16, 32, 64:
  1324. a = base2ba(n, '')
  1325. self.assertEqual(a, bitarray())
  1326. self.assertEqual(ba2base(n, a), '')
  1327. def test_invalid_characters(self):
  1328. for n, s in ((2, '2'), (4, '4'), (8, '8'), (16, 'g'), (32, '8'),
  1329. (32, '1'), (32, 'a'), (64, '-'), (64, '_')):
  1330. msg = ("invalid digit found for base%d, "
  1331. "got '%s' (0x%02x)" % (n, s, ord(s)))
  1332. self.assertRaisesMessage(ValueError, msg, base2ba, n, s)
  1333. for n in 2, 4, 8, 16, 32, 64:
  1334. for s in '_', '@', '[', '\u20ac', '\0', b'\0', b'\x80', b'\xff':
  1335. self.assertRaises(ValueError, base2ba, n, s)
  1336. msg = "invalid digit found for base%d, got '{' (0x7b)" % n
  1337. self.assertRaisesMessage(ValueError, msg, base2ba, n, '{')
  1338. def test_invalid_args(self):
  1339. a = bitarray()
  1340. self.assertRaises(TypeError, ba2base, None, a)
  1341. self.assertRaises(TypeError, base2ba, None, '')
  1342. self.assertRaises(TypeError, ba2base, 16.0, a)
  1343. self.assertRaises(TypeError, base2ba, 16.0, '')
  1344. self.assertRaises(TypeError, ba2base, 32, None)
  1345. self.assertRaises(TypeError, base2ba, 32, None)
  1346. for values, msg in [
  1347. ([-1023, -16, -1, 0, 3, 5, 31, 48, 63, 129, 511, 4123],
  1348. "base must be a power of 2"),
  1349. ([1, 128, 256, 512, 1024, 2048, 4096, 8192],
  1350. "base must be 2, 4, 8, 16, 32 or 64")]:
  1351. for i in values:
  1352. self.assertRaisesMessage(ValueError, msg, ba2base, i, a)
  1353. self.assertRaisesMessage(ValueError, msg, base2ba, i, '')
  1354. a = bitarray(29)
  1355. for m in range(2, 7):
  1356. msg = "bitarray length 29 not multiple of %d" % m
  1357. self.assertRaisesMessage(ValueError, msg, ba2base, 1 << m, a)
  1358. def test_hexadecimal(self):
  1359. a = base2ba(16, 'F61', 'big')
  1360. self.assertEqual(a, bitarray('1111 0110 0001'))
  1361. self.assertEqual(ba2base(16, a), 'f61')
  1362. for n in range(50):
  1363. s = ''.join(choices(hexdigits, k=n))
  1364. endian = self.random_endian()
  1365. a = base2ba(16, s, endian)
  1366. self.assertEQUAL(a, hex2ba(s, endian))
  1367. self.assertEqual(ba2base(16, a), ba2hex(a))
  1368. def test_base32(self):
  1369. msg = os.urandom(randint(10, 100) * 5)
  1370. s = base64.b32encode(msg).decode()
  1371. a = base2ba(32, s, 'big')
  1372. self.assertEqual(a.tobytes(), msg)
  1373. self.assertEqual(ba2base(32, a), s)
  1374. self.assertEqual(base64.b32decode(s), msg)
  1375. def test_base64(self):
  1376. msg = os.urandom(randint(10, 100) * 3)
  1377. s = base64.standard_b64encode(msg).decode()
  1378. a = base2ba(64, s, 'big')
  1379. self.assertEqual(a.tobytes(), msg)
  1380. self.assertEqual(ba2base(64, a), s)
  1381. self.assertEqual(base64.standard_b64decode(s), msg)
  1382. def test_primes(self):
  1383. primes = gen_primes(60, odd=True)
  1384. base_2 = primes.to01()
  1385. for n, endian, rep in [
  1386. ( 2, "little", base_2),
  1387. ( 2, "big", base_2),
  1388. ( 4, "little", "232132030132012122122010132110"),
  1389. ( 4, "big", "131231030231021211211020231220"),
  1390. ( 8, "little", "65554155441515405550"),
  1391. ( 8, "big", "35551455114545105550"),
  1392. (16, "little", "e6bc4b46a921d61"),
  1393. (16, "big", "76d32d265948b68"),
  1394. (32, "little", "O3SJLSJTSI3C"),
  1395. (32, "big", "O3JS2JSZJC3I"),
  1396. (64, "little", "utMtkppEtF"),
  1397. (64, "big", "dtMtJllIto"),
  1398. ]:
  1399. a = bitarray(primes, endian)
  1400. s = ba2base(n, a)
  1401. self.assertEqual(type(s), str)
  1402. self.assertEqual(s, rep)
  1403. b = base2ba(n, rep, endian)
  1404. self.assertEqual(b, a)
  1405. self.assertEqual(type(b), bitarray)
  1406. self.assertEqual(b.endian, endian)
  1407. alphabets = [
  1408. # m n alphabet
  1409. (1, 2, '01'),
  1410. (2, 4, '0123'),
  1411. (3, 8, '01234567'),
  1412. (4, 16, '0123456789abcdef'),
  1413. (4, 16, '0123456789ABCDEF'),
  1414. (5, 32, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ234567'),
  1415. (6, 64, 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdef'
  1416. 'ghijklmnopqrstuvwxyz0123456789+/'),
  1417. ]
  1418. def test_alphabets(self):
  1419. for m, n, alphabet in self.alphabets:
  1420. self.assertEqual(1 << m, n)
  1421. self.assertEqual(len(alphabet), n)
  1422. for i, c in enumerate(alphabet):
  1423. endian = self.random_endian()
  1424. self.assertEqual(ba2int(base2ba(n, c, endian)), i)
  1425. if m == 4 and c in "ABCDEF":
  1426. c = chr(ord(c) + 32)
  1427. self.assertEqual(ba2base(n, int2ba(i, m, endian)), c)
  1428. def test_not_alphabets(self):
  1429. for m, n, alphabet in self.alphabets:
  1430. for i in range(256):
  1431. c = chr(i)
  1432. if c in alphabet or c.isspace():
  1433. continue
  1434. if n == 16 and c in hexdigits:
  1435. continue
  1436. self.assertRaises(ValueError, base2ba, n, c)
  1437. def test_random(self):
  1438. for _ in range(100):
  1439. m = randint(1, 6)
  1440. a = urandom_2(m * randrange(100))
  1441. n = 1 << m
  1442. s = ba2base(n, a, group=randrange(10), sep=randrange(5) * " ")
  1443. if m == 4 and getrandbits(1):
  1444. s = s.upper()
  1445. if getrandbits(1):
  1446. s = s.encode()
  1447. b = base2ba(n, s, a.endian)
  1448. self.assertEQUAL(a, b)
  1449. self.check_obj(b)
  1450. # --------------------------- sparse compression ----------------------------
  1451. class SC_Tests(unittest.TestCase, Util):
  1452. def test_explicit(self):
  1453. for b, bits, endian in [
  1454. (b'\x00\0', '', 'little'),
  1455. (b'\x01\x03\x01\x03\0', '110', 'little'),
  1456. (b'\x01\x07\x01\x40\0', '0000001', 'little'),
  1457. (b'\x11\x07\x01\x02\0', '0000001', 'big'),
  1458. (b'\x01\x10\x02\xf0\x0f\0', '00001111 11110000', 'little'),
  1459. (b'\x11\x10\xa1\x0c\0', '00000000 00001000', 'big'),
  1460. (b'\x11\x09\xa1\x08\0', '00000000 1', 'big'),
  1461. (b'\x01g\xa4abde\0', 97 * '0' + '110110', 'little'),
  1462. ]:
  1463. a = bitarray(bits, endian)
  1464. self.assertEqual(sc_encode(a), b)
  1465. self.assertEQUAL(sc_decode(b), a)
  1466. def test_encode_types(self):
  1467. for a in bitarray('1', 'big'), frozenbitarray('1', 'big'):
  1468. b = sc_encode(a)
  1469. self.assertEqual(type(b), bytes)
  1470. self.assertEqual(b, b'\x11\x01\x01\x80\0')
  1471. for a in None, [], 0, 123, b'', b'\x00', 3.14:
  1472. self.assertRaises(TypeError, sc_encode, a)
  1473. def test_decode_types(self):
  1474. blob = b'\x11\x03\x01\x20\0'
  1475. for b in blob, bytearray(blob), list(blob), array.array('B', blob):
  1476. a = sc_decode(b)
  1477. self.assertEqual(type(a), bitarray)
  1478. self.assertEqual(a.endian, 'big')
  1479. self.assertEqual(a.to01(), '001')
  1480. a = [17, 3, 1, 32, 0]
  1481. self.assertEqual(sc_decode(a), bitarray("001"))
  1482. for x in 256, -1:
  1483. a[-1] = x
  1484. self.assertRaises(ValueError, sc_decode, a)
  1485. self.assertRaises(TypeError, sc_decode, [0x02, None])
  1486. for x in None, 3, 3.2, Ellipsis, 'foo':
  1487. self.assertRaises(TypeError, sc_decode, x)
  1488. def test_decode_header_nbits(self):
  1489. for b, n in [
  1490. (b'\x00\0', 0),
  1491. (b'\x01\x00\0', 0),
  1492. (b'\x01\x01\0', 1),
  1493. (b'\x02\x00\x00\0', 0),
  1494. (b'\x02\x00\x01\0', 256),
  1495. (b'\x03\x00\x00\x00\0', 0),
  1496. (b'\x03\x00\x00\x01\0', 65536),
  1497. ]:
  1498. a = sc_decode(b)
  1499. self.assertEqual(len(a), n)
  1500. self.assertFalse(a.any())
  1501. def test_decode_untouch(self):
  1502. stream = iter(b'\x01\x03\x01\x03\0XYZ')
  1503. self.assertEqual(sc_decode(stream), bitarray('110'))
  1504. self.assertEqual(next(stream), ord('X'))
  1505. stream = iter([0x11, 0x05, 0x01, 0xff, 0, None, 'foo'])
  1506. self.assertEqual(sc_decode(stream), bitarray('11111'))
  1507. self.assertTrue(next(stream) is None)
  1508. self.assertEqual(next(stream), 'foo')
  1509. def test_decode_header_errors(self):
  1510. # invalid header
  1511. for c in 0x20, 0x21, 0x40, 0x80, 0xc0, 0xf0, 0xff:
  1512. self.assertRaisesMessage(ValueError,
  1513. "invalid header: 0x%02x" % c,
  1514. sc_decode, [c])
  1515. # invalid block head
  1516. for c in 0xc0, 0xc1, 0xc5, 0xff:
  1517. self.assertRaisesMessage(ValueError,
  1518. "invalid block head: 0x%02x" % c,
  1519. sc_decode, [0x01, 0x10, c])
  1520. def test_decode_header_overflow(self):
  1521. self.assertRaisesMessage(
  1522. OverflowError,
  1523. "sizeof(Py_ssize_t) = %d: cannot read 9 bytes" % PTRSIZE,
  1524. sc_decode, b'\x09' + 9 * b'\x00')
  1525. self.assertRaisesMessage(
  1526. ValueError,
  1527. "read %d bytes got negative value: -1" % PTRSIZE,
  1528. sc_decode, [PTRSIZE] + PTRSIZE * [0xff])
  1529. if PTRSIZE == 4:
  1530. self.assertRaisesMessage(
  1531. OverflowError,
  1532. "sizeof(Py_ssize_t) = 4: cannot read 5 bytes",
  1533. sc_decode, b'\x05' + 5 * b'\x00')
  1534. self.assertRaisesMessage(
  1535. ValueError,
  1536. "read 4 bytes got negative value: -2147483648",
  1537. sc_decode, b'\x04\x00\x00\x00\x80')
  1538. def test_decode_errors(self):
  1539. # too many raw bytes
  1540. self.assertRaisesMessage(
  1541. ValueError, "decode error (raw): 0 + 2 > 1",
  1542. sc_decode, b"\x01\x05\x02\xff\xff\0")
  1543. self.assertRaisesMessage(
  1544. ValueError, "decode error (raw): 32 + 3 > 34",
  1545. sc_decode, b"\x02\x0f\x01\xa0\x03\xff\xff\xff\0")
  1546. # sparse index too high
  1547. self.assertRaisesMessage(
  1548. ValueError, "decode error (n=1): 128 >= 128",
  1549. sc_decode, b"\x01\x80\xa1\x80\0")
  1550. self.assertRaisesMessage(
  1551. ValueError, "decode error (n=2): 512 >= 512",
  1552. sc_decode, b"\x02\x00\x02\xc2\x01\x00\x02\0")
  1553. self.assertRaisesMessage(
  1554. ValueError, "decode error (n=3): 32768 >= 32768",
  1555. sc_decode, b"\x02\x00\x80\xc3\x01\x00\x80\x00\0")
  1556. msg = {4: "read 4 bytes got negative value: -2147483648",
  1557. 8: "decode error (n=4): 2147483648 >= 16"}
  1558. self.assertRaisesMessage(
  1559. ValueError, msg[PTRSIZE],
  1560. sc_decode, b"\x01\x10\xc4\x01\x00\x00\x00\x80\0")
  1561. msg = {4: "read 4 bytes got negative value: -1",
  1562. 8: "decode error (n=4): 4294967295 >= 16"}
  1563. self.assertRaisesMessage(
  1564. ValueError, msg[PTRSIZE],
  1565. sc_decode, b"\x01\x10\xc4\x01\xff\xff\xff\xff\0")
  1566. def test_decode_end_of_stream(self):
  1567. for stream in [b'', b'\x00', b'\x01', b'\x02\x77',
  1568. b'\x01\x04\x01', b'\x01\x04\xa1', b'\x01\x04\xa0']:
  1569. self.assertRaises(StopIteration, sc_decode, stream)
  1570. def test_decode_ambiguity(self):
  1571. for b in [
  1572. # raw:
  1573. b'\x11\x03\x01\x20\0', # this is what sc_encode gives us
  1574. b'\x11\x03\x01\x3f\0', # but we can set the pad bits to 1
  1575. # sparse:
  1576. b'\x11\x03\xa1\x02\0', # block type 1
  1577. b'\x11\x03\xc2\x01\x02\x00\0', # block type 2
  1578. b'\x11\x03\xc3\x01\x02\x00\x00\0', # block type 3
  1579. b'\x11\x03\xc4\x01\x02\x00\x00\x00\0', # block type 4
  1580. ]:
  1581. a = sc_decode(b)
  1582. self.assertEqual(a.to01(), '001')
  1583. def test_block_type0(self):
  1584. for k in range(0x01, 0xa0):
  1585. nbytes = k if k <= 32 else 32 * (k - 31)
  1586. nbits = 8 * nbytes
  1587. a = ones(nbits, "little")
  1588. b = bytearray([0x01, nbits] if nbits < 256 else
  1589. [0x02, nbits % 256, nbits // 256])
  1590. b.append(k)
  1591. b.extend(a.tobytes())
  1592. b.append(0) # stop byte
  1593. self.assertEqual(sc_decode(b), a)
  1594. self.assertEqual(sc_encode(a), b)
  1595. def test_block_type1(self):
  1596. a = bitarray(256, 'little')
  1597. for n in range(1, 32):
  1598. a[getrandbits(8)] = 1
  1599. b = bytearray([0x02, 0x00, 0x01, 0xa0 + a.count()])
  1600. b.extend(list(a.search(1))) # sorted indices with no duplicates
  1601. b.append(0) # stop byte
  1602. self.assertEqual(sc_decode(b), a)
  1603. self.assertEqual(sc_encode(a), b)
  1604. def test_block_type2(self):
  1605. a = bitarray(65536, 'little')
  1606. for n in range(1, 256):
  1607. a[getrandbits(16)] = 1
  1608. b = bytearray([0x03, 0x00, 0x00, 0x01, 0xc2, a.count()])
  1609. for i in a.search(1):
  1610. b.extend(struct.pack("<H", i))
  1611. b.append(0) # stop byte
  1612. self.assertEqual(sc_decode(b), a)
  1613. if n < 250:
  1614. # We cannot compare for the highest populations, as for
  1615. # such high values sc_encode() may find better compression
  1616. # with type 1 blocks.
  1617. self.assertEqual(sc_encode(a), b)
  1618. else:
  1619. self.assertTrue(len(sc_encode(a)) <= len(b))
  1620. def test_block_type3(self):
  1621. a = bitarray(16_777_216, 'little')
  1622. a[choices(range(1 << 24), k=255)] = 1
  1623. b = bytearray([0x04, 0x00, 0x00, 0x00, 0x01, 0xc3, a.count()])
  1624. for i in a.search(1):
  1625. b.extend(struct.pack("<I", i)[:3])
  1626. b.append(0) # stop byte
  1627. self.assertEqual(sc_decode(b), a)
  1628. self.assertEqual(sc_encode(a), b)
  1629. def test_block_type4(self):
  1630. a = bitarray(1 << 26, 'little')
  1631. # To understand why we cannot have a population larger than 5 for
  1632. # an array size 4 times the size of a type 3 block, take a look
  1633. # at the cost comparison in sc_encode_block(). (2 + 6 >= 2 * 4)
  1634. indices = sorted(set(choices(range(len(a)), k=5)))
  1635. a[indices] = 1
  1636. b = bytearray(b'\x04\x00\x00\x00\x04\xc4')
  1637. b.append(len(indices))
  1638. for i in indices:
  1639. b.extend(struct.pack("<I", i))
  1640. b.append(0) # stop byte
  1641. self.assertEqual(sc_decode(b), a)
  1642. self.assertEqual(sc_encode(a), b)
  1643. def test_decode_random_bytes(self):
  1644. # ensure random input doesn't crash the decoder
  1645. for _ in range(100):
  1646. n = randrange(20)
  1647. b = b'\x02\x00\x04' + os.urandom(n)
  1648. try:
  1649. a = sc_decode(b)
  1650. except (StopIteration, ValueError):
  1651. continue
  1652. self.assertEqual(len(a), 1024)
  1653. self.assertEqual(a.endian, 'little')
  1654. def check_blob_length(self, a, m):
  1655. blob = sc_encode(a)
  1656. self.assertEqual(len(blob), m)
  1657. self.assertEqual(sc_decode(blob), a)
  1658. def test_encode_zeros(self):
  1659. for i in range(26):
  1660. n = 1 << i
  1661. a = zeros(n)
  1662. m = 2 # head byte and stop byte
  1663. m += bits2bytes(n.bit_length()) # size of n in bytes
  1664. self.check_blob_length(a, m)
  1665. a[0] = 1
  1666. m += 2 # block head byte and one index byte
  1667. m += 2 * bool(i > 9) # count byte and second index byte
  1668. m += bool(i > 16) # third index byte
  1669. m += bool(i > 24) # fourth index byte
  1670. self.check_blob_length(a, m)
  1671. def test_encode_ones(self):
  1672. for _ in range(10):
  1673. nbits = randrange(100_000)
  1674. a = ones(nbits)
  1675. m = 2 # head byte and stop byte
  1676. m += bits2bytes(nbits.bit_length()) # size bytes
  1677. nbytes = bits2bytes(nbits)
  1678. m += nbytes # actual raw bytes
  1679. # number of head bytes, all of block type 0:
  1680. m += bool(nbytes % 32) # number in 0x01 .. 0x1f
  1681. m += (nbytes // 32 + 127) // 128 # number in 0x20 .. 0xbf
  1682. self.check_blob_length(a, m)
  1683. def round_trip(self, a):
  1684. c = a.copy()
  1685. i = iter(sc_encode(a))
  1686. b = sc_decode(i)
  1687. self.assertTrue(a == b == c)
  1688. self.assertTrue(a.endian == b.endian == c.endian)
  1689. self.assertEqual(list(i), [])
  1690. def test_random(self):
  1691. for _ in range(10):
  1692. n = randrange(100_000)
  1693. endian = self.random_endian()
  1694. a = ones(n, endian)
  1695. while a.count():
  1696. a &= urandom(n, endian)
  1697. self.round_trip(a)
  1698. # ---------------------------------------------------------------------------
  1699. class VLFTests(unittest.TestCase, Util):
  1700. def test_explicit(self):
  1701. for blob, s in [
  1702. (b'\x40', ''),
  1703. (b'\x30', '0'),
  1704. (b'\x38', '1'),
  1705. (b'\x00', '0000'),
  1706. (b'\x01', '0001'),
  1707. (b'\xd3\x20', '001101'),
  1708. (b'\xe0\x40', '0000 1'),
  1709. (b'\x90\x02', '0000 000001'),
  1710. (b'\xb5\xa7\x18', '0101 0100111 0011'),
  1711. (b'\x95\xb7\x1c', '0101 0110111 001110'),
  1712. ]:
  1713. default_endian = self.random_endian()
  1714. _set_default_endian(default_endian)
  1715. a = bitarray(s)
  1716. self.assertEqual(vl_encode(a), blob)
  1717. c = vl_decode(blob)
  1718. self.assertEqual(c, a)
  1719. self.assertEqual(c.endian, default_endian)
  1720. for endian in 'big', 'little', None:
  1721. a = bitarray(s, endian)
  1722. c = vl_encode(a)
  1723. self.assertEqual(type(c), bytes)
  1724. self.assertEqual(c, blob)
  1725. c = vl_decode(blob, endian)
  1726. self.assertEqual(c, a)
  1727. self.assertEqual(c.endian, endian or default_endian)
  1728. def test_encode_types(self):
  1729. s = "0011 01"
  1730. for a in bitarray(s), frozenbitarray(s):
  1731. b = vl_encode(a)
  1732. self.assertEqual(type(b), bytes)
  1733. self.assertEqual(b, b'\xd3\x20')
  1734. for a in None, [], 0, 123, b'', b'\x00', 3.14:
  1735. self.assertRaises(TypeError, vl_encode, a)
  1736. def test_decode_types(self):
  1737. blob = b'\xd3\x20'
  1738. for s in (blob, iter(blob), memoryview(blob), iter([0xd3, 0x20]),
  1739. bytearray(blob)):
  1740. a = vl_decode(s, endian=self.random_endian())
  1741. self.assertEqual(type(a), bitarray)
  1742. self.assertEqual(a, bitarray('0011 01'))
  1743. # these objects are not iterable
  1744. for arg in None, 0, 1, 0.0:
  1745. self.assertRaises(TypeError, vl_decode, arg)
  1746. # these items cannot be interpreted as ints
  1747. for item in None, 2.34, Ellipsis, 'foo':
  1748. self.assertRaises(TypeError, vl_decode, iter([0x95, item]))
  1749. def test_decode_args(self):
  1750. # item not integer
  1751. self.assertRaises(TypeError, vl_decode, iter([b'\x40']))
  1752. self.assertRaises(TypeError, vl_decode, b'\x40', 'big', 3)
  1753. self.assertRaises(ValueError, vl_decode, b'\x40', 'foo')
  1754. def test_decode_trailing(self):
  1755. for s, bits in [(b'\x40ABC', ''),
  1756. (b'\xe0\x40A', '00001')]:
  1757. stream = iter(s)
  1758. self.assertEqual(vl_decode(stream), bitarray(bits))
  1759. self.assertEqual(next(stream), 65)
  1760. def test_decode_ambiguity(self):
  1761. for s in b'\x40', b'\x4f', b'\x45':
  1762. self.assertEqual(vl_decode(s), bitarray())
  1763. for s in b'\x1e', b'\x1f':
  1764. self.assertEqual(vl_decode(s), bitarray('111'))
  1765. def test_decode_stream(self):
  1766. stream = iter(b'\x40\x30\x38\x40\x2c\xe0\x40\xd3\x20')
  1767. for bits in '', '0', '1', '', '11', '0000 1', '0011 01':
  1768. self.assertEqual(vl_decode(stream), bitarray(bits))
  1769. arrays = [urandom(randrange(30)) for _ in range(1000)]
  1770. stream = iter(b''.join(vl_encode(a) for a in arrays))
  1771. for a in arrays:
  1772. self.assertEqual(vl_decode(stream), a)
  1773. def test_decode_errors(self):
  1774. # decode empty bytes
  1775. self.assertRaises(StopIteration, vl_decode, b'')
  1776. # invalid head byte
  1777. for s in [
  1778. b'\x70', b'\xf0', # padding = 7
  1779. b'\x50', b'\x60', b'\x70', # no second byte, but padding > 4
  1780. ]:
  1781. self.assertRaisesMessage(ValueError,
  1782. "invalid head byte: 0x%02x" % s[0],
  1783. vl_decode, s)
  1784. # high bit set, but no terminating byte
  1785. for s in b'\x80', b'\x80\x80':
  1786. self.assertRaises(StopIteration, vl_decode, s)
  1787. # decode list with out of range items
  1788. for i in -1, 256:
  1789. self.assertRaises(ValueError, vl_decode, [i])
  1790. # wrong type
  1791. self.assertRaises(TypeError, vl_decode, [None])
  1792. def test_decode_invalid_stream(self):
  1793. N = 100
  1794. s = iter(N * (3 * [0x80] + ['XX']) + ['end.'])
  1795. for _ in range(N):
  1796. a = None
  1797. try:
  1798. a = vl_decode(s)
  1799. except TypeError:
  1800. pass
  1801. self.assertTrue(a is None)
  1802. self.assertEqual(next(s), 'end.')
  1803. def test_explicit_zeros(self):
  1804. for n in range(100):
  1805. a = zeros(4 + n * 7)
  1806. s = n * b'\x80' + b'\x00'
  1807. self.assertEqual(vl_encode(a), s)
  1808. self.assertEqual(vl_decode(s), a)
  1809. def round_trip(self, a):
  1810. c = a.copy()
  1811. s = vl_encode(a)
  1812. b = vl_decode(s)
  1813. self.check_obj(b)
  1814. self.assertTrue(a == b == c)
  1815. LEN_PAD_BITS = 3
  1816. self.assertEqual(len(s), (len(a) + LEN_PAD_BITS + 6) // 7)
  1817. head = s[0]
  1818. padding = (head & 0x70) >> 4
  1819. self.assertEqual(len(a) + padding, 7 * len(s) - LEN_PAD_BITS)
  1820. def test_large(self):
  1821. for _ in range(10):
  1822. a = urandom(randrange(100_000))
  1823. self.round_trip(a)
  1824. def test_random(self):
  1825. for a in self.randombitarrays():
  1826. self.round_trip(a)
  1827. # ---------------------------------------------------------------------------
  1828. class IntegerizationTests(unittest.TestCase, Util):
  1829. def test_ba2int(self):
  1830. self.assertEqual(ba2int(bitarray('0')), 0)
  1831. self.assertEqual(ba2int(bitarray('1')), 1)
  1832. self.assertEqual(ba2int(bitarray('00101', 'big')), 5)
  1833. self.assertEqual(ba2int(bitarray('00101', 'little')), 20)
  1834. self.assertEqual(ba2int(frozenbitarray('11')), 3)
  1835. self.assertRaises(ValueError, ba2int, bitarray())
  1836. self.assertRaises(ValueError, ba2int, frozenbitarray())
  1837. self.assertRaises(TypeError, ba2int, '101')
  1838. a = bitarray('111')
  1839. b = a.copy()
  1840. self.assertEqual(ba2int(a), 7)
  1841. # ensure original object wasn't altered
  1842. self.assertEQUAL(a, b)
  1843. def test_ba2int_frozen(self):
  1844. for a in self.randombitarrays(start=1):
  1845. b = frozenbitarray(a)
  1846. self.assertEqual(ba2int(b), ba2int(a))
  1847. self.assertEQUAL(a, b)
  1848. def test_ba2int_random(self):
  1849. for a in self.randombitarrays(start=1):
  1850. b = bitarray(a, 'big')
  1851. self.assertEqual(a, b)
  1852. self.assertEqual(ba2int(b), int(b.to01(), 2))
  1853. def test_ba2int_bytes(self):
  1854. for n in range(1, 50):
  1855. a = urandom_2(8 * n)
  1856. c = bytearray(a.tobytes())
  1857. i = 0
  1858. for x in (c if a.endian == 'big' else reversed(c)):
  1859. i <<= 8
  1860. i |= x
  1861. self.assertEqual(ba2int(a), i)
  1862. def test_int2ba(self):
  1863. self.assertEqual(int2ba(0), bitarray('0'))
  1864. self.assertEqual(int2ba(1), bitarray('1'))
  1865. self.assertEqual(int2ba(5), bitarray('101'))
  1866. self.assertEQUAL(int2ba(6, endian='big'), bitarray('110', 'big'))
  1867. self.assertEQUAL(int2ba(6, endian='little'),
  1868. bitarray('011', 'little'))
  1869. self.assertRaises(TypeError, int2ba, 1.0)
  1870. self.assertRaises(TypeError, int2ba, 1, 3.0)
  1871. self.assertRaises(ValueError, int2ba, 1, 0)
  1872. self.assertRaises(TypeError, int2ba, 1, 10, 123)
  1873. self.assertRaises(ValueError, int2ba, 1, 10, 'asd')
  1874. # signed integer requires length
  1875. self.assertRaises(TypeError, int2ba, 100, signed=True)
  1876. def test_signed(self):
  1877. for s, i in [
  1878. ('0', 0),
  1879. ('1', -1),
  1880. ('00', 0),
  1881. ('10', 1),
  1882. ('01', -2),
  1883. ('11', -1),
  1884. ('000', 0),
  1885. ('100', 1),
  1886. ('010', 2),
  1887. ('110', 3),
  1888. ('001', -4),
  1889. ('101', -3),
  1890. ('011', -2),
  1891. ('111', -1),
  1892. ('00000', 0),
  1893. ('11110', 15),
  1894. ('00001', -16),
  1895. ('11111', -1),
  1896. ('00000000 0', 0),
  1897. ('11111111 0', 255),
  1898. ('00000000 1', -256),
  1899. ('11111111 1', -1),
  1900. ]:
  1901. self.assertEqual(ba2int(bitarray(s, 'little'), signed=1), i)
  1902. self.assertEqual(ba2int(bitarray(s[::-1], 'big'), signed=1), i)
  1903. len_s = len(bitarray(s))
  1904. self.assertEQUAL(int2ba(i, len_s, 'little', signed=1),
  1905. bitarray(s, 'little'))
  1906. self.assertEQUAL(int2ba(i, len_s, 'big', signed=1),
  1907. bitarray(s[::-1], 'big'))
  1908. def test_zero(self):
  1909. for endian in "little", "big":
  1910. a = int2ba(0, endian=endian)
  1911. self.assertEQUAL(a, bitarray('0', endian=endian))
  1912. for n in range(1, 100):
  1913. a = int2ba(0, length=n, endian=endian, signed=True)
  1914. b = bitarray(n * '0', endian)
  1915. self.assertEQUAL(a, b)
  1916. for signed in 0, 1:
  1917. self.assertEqual(ba2int(b, signed=signed), 0)
  1918. def test_negative_one(self):
  1919. for endian in "little", "big":
  1920. for n in range(1, 100):
  1921. a = int2ba(-1, length=n, endian=endian, signed=True)
  1922. b = bitarray(n * '1', endian)
  1923. self.assertEQUAL(a, b)
  1924. self.assertEqual(ba2int(b, signed=True), -1)
  1925. def test_int2ba_overflow(self):
  1926. self.assertRaises(OverflowError, int2ba, -1)
  1927. self.assertRaises(OverflowError, int2ba, -1, 4)
  1928. self.assertRaises(OverflowError, int2ba, 128, 7)
  1929. self.assertRaises(OverflowError, int2ba, 64, 7, signed=1)
  1930. self.assertRaises(OverflowError, int2ba, -65, 7, signed=1)
  1931. for n in range(1, 20):
  1932. self.assertRaises(OverflowError, int2ba, 1 << n, n)
  1933. self.assertRaises(OverflowError, int2ba, 1 << (n - 1), n,
  1934. signed=1)
  1935. self.assertRaises(OverflowError, int2ba, -(1 << (n - 1)) - 1, n,
  1936. signed=1)
  1937. def test_int2ba_length(self):
  1938. self.assertRaises(TypeError, int2ba, 0, 1.0)
  1939. self.assertRaises(ValueError, int2ba, 0, 0)
  1940. self.assertEqual(int2ba(5, length=6, endian='big'),
  1941. bitarray('000101'))
  1942. for n in range(1, 100):
  1943. ab = int2ba(1, n, 'big')
  1944. al = int2ba(1, n, 'little')
  1945. self.assertEqual(ab.endian, 'big')
  1946. self.assertEqual(al.endian, 'little')
  1947. self.assertEqual(len(ab), n),
  1948. self.assertEqual(len(al), n)
  1949. self.assertEqual(ab, bitarray((n - 1) * '0') + bitarray('1'))
  1950. self.assertEqual(al, bitarray('1') + bitarray((n - 1) * '0'))
  1951. ab = int2ba(0, n, 'big')
  1952. al = int2ba(0, n, 'little')
  1953. self.assertEqual(len(ab), n)
  1954. self.assertEqual(len(al), n)
  1955. self.assertEqual(ab, bitarray(n * '0', 'big'))
  1956. self.assertEqual(al, bitarray(n * '0', 'little'))
  1957. self.assertEqual(int2ba(2 ** n - 1), bitarray(n * '1'))
  1958. self.assertEqual(int2ba(2 ** n - 1, endian='little'),
  1959. bitarray(n * '1'))
  1960. def test_explicit(self):
  1961. _set_default_endian('big')
  1962. for i, sa in [( 0, '0'), (1, '1'),
  1963. ( 2, '10'), (3, '11'),
  1964. (25, '11001'), (265, '100001001'),
  1965. (3691038, '1110000101001000011110')]:
  1966. ab = bitarray(sa, 'big')
  1967. al = bitarray(sa[::-1], 'little')
  1968. self.assertEQUAL(int2ba(i), ab)
  1969. self.assertEQUAL(int2ba(i, endian='big'), ab)
  1970. self.assertEQUAL(int2ba(i, endian='little'), al)
  1971. self.assertEqual(ba2int(ab), ba2int(al), i)
  1972. def check_round_trip(self, i):
  1973. for endian in 'big', 'little':
  1974. a = int2ba(i, endian=endian)
  1975. self.check_obj(a)
  1976. self.assertEqual(a.endian, endian)
  1977. self.assertTrue(len(a) > 0)
  1978. # ensure we have no leading zeros
  1979. if a.endian == 'big':
  1980. self.assertTrue(len(a) == 1 or a.index(1) == 0)
  1981. self.assertEqual(ba2int(a), i)
  1982. if i > 0:
  1983. self.assertEqual(i.bit_length(), len(a))
  1984. # add a few trailing / leading zeros to bitarray
  1985. if endian == 'big':
  1986. a = zeros(randrange(4), endian) + a
  1987. else:
  1988. a = a + zeros(randrange(4), endian)
  1989. self.assertEqual(a.endian, endian)
  1990. self.assertEqual(ba2int(a), i)
  1991. def test_many(self):
  1992. for _ in range(20):
  1993. self.check_round_trip(randrange(10 ** randint(3, 300)))
  1994. @staticmethod
  1995. def twos_complement(i, num_bits):
  1996. # https://en.wikipedia.org/wiki/Two%27s_complement
  1997. mask = 2 ** (num_bits - 1)
  1998. return -(i & mask) + (i & ~mask)
  1999. def test_random_signed(self):
  2000. for a in self.randombitarrays(start=1):
  2001. i = ba2int(a, signed=True)
  2002. b = int2ba(i, len(a), a.endian, signed=True)
  2003. self.assertEQUAL(a, b)
  2004. j = ba2int(a, signed=False) # unsigned
  2005. if i >= 0:
  2006. self.assertEqual(i, j)
  2007. self.assertEqual(i, self.twos_complement(j, len(a)))
  2008. # ---------------------------------------------------------------------------
  2009. class MixedTests(unittest.TestCase, Util):
  2010. def test_bin(self):
  2011. for _ in range(20):
  2012. i = randrange(1000)
  2013. s = bin(i)
  2014. self.assertEqual(s[:2], '0b')
  2015. a = bitarray(s[2:], 'big')
  2016. self.assertEqual(ba2int(a), i)
  2017. t = a.to01()
  2018. self.assertEqual(t, s[2:])
  2019. self.assertEqual(int(t, 2), i)
  2020. def test_oct(self):
  2021. for _ in range(20):
  2022. i = randrange(1000)
  2023. s = oct(i)
  2024. self.assertEqual(s[:2], '0o')
  2025. a = base2ba(8, s[2:], 'big')
  2026. self.assertEqual(ba2int(a), i)
  2027. t = ba2base(8, a)
  2028. self.assertEqual(t, s[2:])
  2029. self.assertEqual(int(t, 8), i)
  2030. def test_hex(self):
  2031. for _ in range(20):
  2032. i = randrange(1000)
  2033. s = hex(i)
  2034. self.assertEqual(s[:2], '0x')
  2035. a = hex2ba(s[2:], 'big')
  2036. self.assertEqual(ba2int(a), i)
  2037. t = ba2hex(a)
  2038. self.assertEqual(t, s[2:])
  2039. self.assertEqual(int(t, 16), i)
  2040. def test_bitwise(self):
  2041. for a in self.randombitarrays(start=1):
  2042. b = urandom(len(a), a.endian)
  2043. aa = a.copy()
  2044. bb = b.copy()
  2045. i = ba2int(a)
  2046. j = ba2int(b)
  2047. self.assertEqual(ba2int(a & b), i & j)
  2048. self.assertEqual(ba2int(a | b), i | j)
  2049. self.assertEqual(ba2int(a ^ b), i ^ j)
  2050. n = randint(0, len(a))
  2051. if a.endian == 'big':
  2052. self.assertEqual(ba2int(a >> n), i >> n)
  2053. c = zeros(len(a), 'big') + a
  2054. self.assertEqual(ba2int(c << n), i << n)
  2055. self.assertEQUAL(a, aa)
  2056. self.assertEQUAL(b, bb)
  2057. def test_bitwise_inplace(self):
  2058. for a in self.randombitarrays(start=1):
  2059. b = urandom(len(a), a.endian)
  2060. bb = b.copy()
  2061. i = ba2int(a)
  2062. j = ba2int(b)
  2063. c = a.copy()
  2064. c &= b
  2065. self.assertEqual(ba2int(c), i & j)
  2066. c = a.copy()
  2067. c |= b
  2068. self.assertEqual(ba2int(c), i | j)
  2069. c = a.copy()
  2070. c ^= b
  2071. self.assertEqual(ba2int(c), i ^ j)
  2072. self.assertEQUAL(b, bb)
  2073. n = randint(0, len(a))
  2074. if a.endian == 'big':
  2075. c = a.copy()
  2076. c >>= n
  2077. self.assertEqual(ba2int(c), i >> n)
  2078. c = zeros(len(a), 'big') + a
  2079. c <<= n
  2080. self.assertEqual(ba2int(c), i << n)
  2081. # ---------------------- serialize() deserialize() -----------------------
  2082. class SerializationTests(unittest.TestCase, Util):
  2083. def test_explicit(self):
  2084. for blob, endian, bits in [
  2085. (b'\x00', 'little', ''),
  2086. (b'\x07\x01', 'little', '1'),
  2087. (b'\x17\x80', 'big', '1'),
  2088. (b'\x13\xf8', 'big', '11111'),
  2089. (b'\x00\x0f', 'little', '11110000'),
  2090. (b'\x10\xf0', 'big', '11110000'),
  2091. (b'\x12\x87\xd8', 'big', '10000111 110110')
  2092. ]:
  2093. a = bitarray(bits, endian)
  2094. s = serialize(a)
  2095. self.assertEqual(blob, s)
  2096. self.assertEqual(type(s), bytes)
  2097. b = deserialize(blob)
  2098. self.assertEqual(b, a)
  2099. self.assertEqual(b.endian, endian)
  2100. self.assertEqual(type(b), bitarray)
  2101. def test_serialize_args(self):
  2102. for x in '0', 0, 1, b'\x00', 0.0, [0, 1], bytearray([0]):
  2103. self.assertRaises(TypeError, serialize, x)
  2104. # no arguments
  2105. self.assertRaises(TypeError, serialize)
  2106. # too many arguments
  2107. self.assertRaises(TypeError, serialize, bitarray(), 1)
  2108. for a in bitarray('0111', 'big'), frozenbitarray('0111', 'big'):
  2109. self.assertEqual(serialize(a), b'\x14\x70')
  2110. def test_deserialize_args(self):
  2111. for x in 0, 1, False, True, None, '', '01', 0.0, [0, 1]:
  2112. self.assertRaises(TypeError, deserialize, x)
  2113. # no arguments
  2114. self.assertRaises(TypeError, deserialize)
  2115. # too many arguments
  2116. self.assertRaises(TypeError, deserialize, b'\x00', 1)
  2117. blob = b'\x03\x06'
  2118. x = bitarray(blob)
  2119. for s in blob, bytearray(blob), memoryview(blob), x:
  2120. a = deserialize(s)
  2121. self.assertEqual(a.to01(), '01100')
  2122. self.assertEqual(a.endian, 'little')
  2123. def test_invalid_bytes(self):
  2124. self.assertRaises(ValueError, deserialize, b'')
  2125. def check_msg(b):
  2126. msg = "invalid header byte: 0x%02x" % b[0]
  2127. self.assertRaisesMessage(ValueError, msg, deserialize, b)
  2128. for i in range(256):
  2129. b = bytearray([i])
  2130. if i == 0 or i == 16:
  2131. self.assertEqual(deserialize(b), bitarray())
  2132. else:
  2133. self.assertRaises(ValueError, deserialize, b)
  2134. check_msg(b)
  2135. b.append(0)
  2136. if i < 32 and i % 16 < 8:
  2137. self.assertEqual(deserialize(b), zeros(8 - i % 8))
  2138. else:
  2139. self.assertRaises(ValueError, deserialize, b)
  2140. check_msg(b)
  2141. def test_padbits_ignored(self):
  2142. for blob, endian in [
  2143. (b'\x07\x01', 'little'),
  2144. (b'\x07\x03', 'little'),
  2145. (b'\x07\xff', 'little'),
  2146. (b'\x17\x80', 'big'),
  2147. (b'\x17\xc0', 'big'),
  2148. (b'\x17\xff', 'big'),
  2149. ]:
  2150. a = deserialize(blob)
  2151. self.assertEqual(a.to01(), '1')
  2152. self.assertEqual(a.endian, endian)
  2153. def test_random(self):
  2154. for a in self.randombitarrays():
  2155. b = serialize(a)
  2156. c = deserialize(b)
  2157. self.assertEqual(a, c)
  2158. self.assertEqual(a.endian, c.endian)
  2159. self.check_obj(c)
  2160. # ---------------------------------------------------------------------------
  2161. class HuffmanTreeTests(unittest.TestCase): # tests for _huffman_tree()
  2162. def test_empty(self):
  2163. freq = {}
  2164. self.assertRaises(IndexError, _huffman_tree, freq)
  2165. def test_one_symbol(self):
  2166. freq = {"A": 1}
  2167. tree = _huffman_tree(freq)
  2168. self.assertEqual(tree.symbol, "A")
  2169. self.assertEqual(tree.freq, 1)
  2170. self.assertRaises(AttributeError, getattr, tree, 'child')
  2171. def test_two_symbols(self):
  2172. freq = {"A": 1, "B": 1}
  2173. tree = _huffman_tree(freq)
  2174. self.assertRaises(AttributeError, getattr, tree, 'symbol')
  2175. self.assertEqual(tree.freq, 2)
  2176. self.assertEqual(tree.child[0].symbol, "A")
  2177. self.assertEqual(tree.child[0].freq, 1)
  2178. self.assertEqual(tree.child[1].symbol, "B")
  2179. self.assertEqual(tree.child[1].freq, 1)
  2180. class HuffmanTests(unittest.TestCase):
  2181. def test_simple(self):
  2182. freq = {0: 10, 'as': 2, None: 1.6}
  2183. code = huffman_code(freq)
  2184. self.assertEqual(len(code), 3)
  2185. self.assertEqual(len(code[0]), 1)
  2186. self.assertEqual(len(code['as']), 2)
  2187. self.assertEqual(len(code[None]), 2)
  2188. def test_endianness(self):
  2189. freq = {'A': 10, 'B': 2, 'C': 5}
  2190. for endian in 'big', 'little':
  2191. code = huffman_code(freq, endian)
  2192. self.assertEqual(len(code), 3)
  2193. for v in code.values():
  2194. self.assertEqual(v.endian, endian)
  2195. def test_wrong_arg(self):
  2196. self.assertRaises(TypeError, huffman_code, [('a', 1)])
  2197. self.assertRaises(TypeError, huffman_code, 123)
  2198. self.assertRaises(TypeError, huffman_code, None)
  2199. # cannot compare 'a' with 1
  2200. self.assertRaises(TypeError, huffman_code, {'A': 'a', 'B': 1})
  2201. # frequency map cannot be empty
  2202. self.assertRaises(ValueError, huffman_code, {})
  2203. def test_one_symbol(self):
  2204. cnt = {'a': 1}
  2205. code = huffman_code(cnt)
  2206. self.assertEqual(code, {'a': bitarray('0')})
  2207. for n in range(4):
  2208. msg = n * ['a']
  2209. a = bitarray()
  2210. a.encode(code, msg)
  2211. self.assertEqual(a.to01(), n * '0')
  2212. self.assertEqual(list(a.decode(code)), msg)
  2213. a.append(1)
  2214. self.assertRaises(ValueError, list, a.decode(code))
  2215. def check_tree(self, code):
  2216. n = len(code)
  2217. tree = decodetree(code)
  2218. self.assertEqual(tree.todict(), code)
  2219. # ensure tree has 2n-1 nodes (n symbol nodes and n-1 internal nodes)
  2220. self.assertEqual(tree.nodes(), 2 * n - 1)
  2221. # a proper Huffman tree is complete
  2222. self.assertTrue(tree.complete())
  2223. def test_balanced(self):
  2224. n = 6
  2225. freq = {}
  2226. for i in range(1 << n):
  2227. freq[i] = 1
  2228. code = huffman_code(freq)
  2229. self.assertEqual(len(code), 1 << n)
  2230. self.assertTrue(all(len(v) == n for v in code.values()))
  2231. self.check_tree(code)
  2232. def test_unbalanced(self):
  2233. n = 27
  2234. freq = {}
  2235. for i in range(n):
  2236. freq[i] = 1 << i
  2237. code = huffman_code(freq)
  2238. self.assertEqual(len(code), n)
  2239. for i in range(n):
  2240. self.assertEqual(len(code[i]), n - max(1, i))
  2241. self.check_tree(code)
  2242. def test_counter(self):
  2243. message = 'the quick brown fox jumps over the lazy dog.'
  2244. code = huffman_code(Counter(message))
  2245. a = bitarray()
  2246. a.encode(code, message)
  2247. self.assertEqual(''.join(a.decode(code)), message)
  2248. self.check_tree(code)
  2249. def test_random_list(self):
  2250. plain = choices(range(100), k=500)
  2251. code = huffman_code(Counter(plain))
  2252. a = bitarray()
  2253. a.encode(code, plain)
  2254. self.assertEqual(list(a.decode(code)), plain)
  2255. self.check_tree(code)
  2256. def test_random_freq(self):
  2257. for n in 2, 3, 4, randint(5, 200):
  2258. # create Huffman code for n symbols
  2259. code = huffman_code({i: random() for i in range(n)})
  2260. self.check_tree(code)
  2261. # ---------------------------------------------------------------------------
  2262. class CanonicalHuffmanTests(unittest.TestCase, Util):
  2263. def test_basic(self):
  2264. plain = bytearray(b'the quick brown fox jumps over the lazy dog.')
  2265. chc, count, symbol = canonical_huffman(Counter(plain))
  2266. self.assertEqual(type(chc), dict)
  2267. self.assertEqual(type(count), list)
  2268. self.assertEqual(type(symbol), list)
  2269. a = bitarray()
  2270. a.encode(chc, plain)
  2271. self.assertEqual(bytearray(a.decode(chc)), plain)
  2272. self.assertEqual(bytearray(canonical_decode(a, count, symbol)), plain)
  2273. def test_example(self):
  2274. cnt = {'a': 5, 'b': 3, 'c': 1, 'd': 1, 'r': 2}
  2275. codedict, count, symbol = canonical_huffman(cnt)
  2276. self.assertEqual(codedict, {'a': bitarray('0'),
  2277. 'b': bitarray('10'),
  2278. 'c': bitarray('1110'),
  2279. 'd': bitarray('1111'),
  2280. 'r': bitarray('110')})
  2281. self.assertEqual(count, [0, 1, 1, 1, 2])
  2282. self.assertEqual(symbol, ['a', 'b', 'r', 'c', 'd'])
  2283. a = bitarray('01011001110011110101100')
  2284. msg = "abracadabra"
  2285. self.assertEqual(''.join(a.decode(codedict)), msg)
  2286. self.assertEqual(''.join(canonical_decode(a, count, symbol)), msg)
  2287. def test_canonical_huffman_errors(self):
  2288. self.assertRaises(TypeError, canonical_huffman, [])
  2289. # frequency map cannot be empty
  2290. self.assertRaises(ValueError, canonical_huffman, {})
  2291. self.assertRaises(TypeError, canonical_huffman)
  2292. cnt = huffman_code(Counter('aabc'))
  2293. self.assertRaises(TypeError, canonical_huffman, cnt, 'a')
  2294. def test_one_symbol(self):
  2295. cnt = {'a': 1}
  2296. chc, count, symbol = canonical_huffman(cnt)
  2297. self.assertEqual(chc, {'a': bitarray('0')})
  2298. self.assertEqual(count, [0, 1])
  2299. self.assertEqual(symbol, ['a'])
  2300. for n in range(4):
  2301. msg = n * ['a']
  2302. a = bitarray()
  2303. a.encode(chc, msg)
  2304. self.assertEqual(a.to01(), n * '0')
  2305. self.assertEqual(list(canonical_decode(a, count, symbol)), msg)
  2306. a.append(1)
  2307. self.assertRaises(ValueError, list,
  2308. canonical_decode(a, count, symbol))
  2309. def test_canonical_decode_errors(self):
  2310. a = bitarray('1101')
  2311. s = ['a']
  2312. # bitarray not of bitarray type
  2313. self.assertRaises(TypeError, canonical_decode, '11', [0, 1], s)
  2314. # count not sequence
  2315. self.assertRaises(TypeError, canonical_decode, a, {0, 1}, s)
  2316. # count element not an int
  2317. self.assertRaises(TypeError, canonical_decode, a, [0, 1.0], s)
  2318. # count element overflow
  2319. self.assertRaises(OverflowError, canonical_decode, a, [0, 1 << 65], s)
  2320. # symbol not sequence
  2321. self.assertRaises(TypeError, canonical_decode, a, [0, 1], 43)
  2322. symbol = ['a', 'b', 'c', 'd']
  2323. # sum(count) != len(symbol)
  2324. self.assertRaisesMessage(ValueError,
  2325. "sum(count) = 3, but len(symbol) = 4",
  2326. canonical_decode, a, [0, 1, 2], symbol)
  2327. # count list too long
  2328. self.assertRaisesMessage(ValueError,
  2329. "len(count) cannot be larger than 32",
  2330. canonical_decode, a, 33 * [0], symbol)
  2331. def test_canonical_decode_count_range(self):
  2332. a = bitarray()
  2333. for i in range(1, 32):
  2334. count = 32 * [0]
  2335. # negative count
  2336. count[i] = -1
  2337. self.assertRaisesMessage(ValueError,
  2338. "count[%d] not in [0..%d], got -1" % (i, 1 << i),
  2339. canonical_decode, a, count, [])
  2340. maxbits = 1 << i
  2341. count[i] = maxbits
  2342. if i == 31 and PTRSIZE == 4:
  2343. self.assertRaises(OverflowError,
  2344. canonical_decode, a, count, [])
  2345. continue
  2346. self.assertRaisesMessage(ValueError,
  2347. "sum(count) = %d, but len(symbol) = 0" % maxbits,
  2348. canonical_decode, a, count, [])
  2349. count[i] = maxbits + 1
  2350. self.assertRaisesMessage(ValueError,
  2351. "count[%d] not in [0..%d], got %d" % (i, maxbits, count[i]),
  2352. canonical_decode, a, count, [])
  2353. iter = canonical_decode(a, 32 * [0], [])
  2354. self.assertEqual(list(iter), [])
  2355. def test_canonical_decode_simple(self):
  2356. # symbols can be anything, they do not even have to be hashable here
  2357. cnt = [0, 0, 4]
  2358. s = ['A', 42, [1.2-3.7j, 4j], {'B': 6}]
  2359. a = bitarray('00 01 10 11')
  2360. # count can be a list
  2361. self.assertEqual(list(canonical_decode(a, cnt, s)), s)
  2362. # count can also be a tuple (any sequence object in fact)
  2363. self.assertEqual(list(canonical_decode(a, (0, 0, 4), s)), s)
  2364. self.assertEqual(list(canonical_decode(7 * a, cnt, s)), 7 * s)
  2365. # the count list may have extra 0's at the end (but not too many)
  2366. count = [0, 0, 4, 0, 0, 0, 0, 0]
  2367. self.assertEqual(list(canonical_decode(a, count, s)), s)
  2368. # the element count[0] is unused
  2369. self.assertEqual(list(canonical_decode(a, [-47, 0, 4], s)), s)
  2370. # in fact it can be anything, as it is entirely ignored
  2371. self.assertEqual(list(canonical_decode(a, [None, 0, 4], s)), s)
  2372. # the symbol argument can be any sequence object
  2373. s = [65, 66, 67, 98]
  2374. self.assertEqual(list(canonical_decode(a, cnt, s)), s)
  2375. self.assertEqual(list(canonical_decode(a, cnt, bytearray(s))), s)
  2376. self.assertEqual(list(canonical_decode(a, cnt, tuple(s))), s)
  2377. self.assertEqual(list(canonical_decode(a, cnt, bytes(s))), s)
  2378. # Implementation Note:
  2379. # The symbol can even be an iterable. This was done because we
  2380. # want to use PySequence_Fast in order to convert sequence
  2381. # objects (like bytes and bytearray) to a list. This is faster
  2382. # as all objects are now elements in an array of pointers (as
  2383. # opposed to having the object's __getitem__ method called on
  2384. # every iteration).
  2385. self.assertEqual(list(canonical_decode(a, cnt, iter(s))), s)
  2386. def test_canonical_decode_empty(self):
  2387. a = bitarray()
  2388. # count and symbol are empty, ok because sum([]) == len([])
  2389. self.assertEqual(list(canonical_decode(a, [], [])), [])
  2390. a.append(0)
  2391. self.assertRaisesMessage(ValueError, "reached end of bitarray",
  2392. list, canonical_decode(a, [], []))
  2393. a = bitarray(31 * '0')
  2394. self.assertRaisesMessage(ValueError, "ran out of codes",
  2395. list, canonical_decode(a, [], []))
  2396. def test_canonical_decode_one_symbol(self):
  2397. symbols = ['A']
  2398. count = [0, 1]
  2399. a = bitarray('000')
  2400. self.assertEqual(list(canonical_decode(a, count, symbols)),
  2401. 3 * symbols)
  2402. a.append(1)
  2403. a.extend(bitarray(10 * '0'))
  2404. iterator = canonical_decode(a, count, symbols)
  2405. self.assertRaisesMessage(ValueError, "reached end of bitarray",
  2406. list, iterator)
  2407. a.extend(bitarray(20 * '0'))
  2408. iterator = canonical_decode(a, count, symbols)
  2409. self.assertRaisesMessage(ValueError, "ran out of codes",
  2410. list, iterator)
  2411. def test_canonical_decode_large(self):
  2412. with open(__file__, 'rb') as f:
  2413. msg = bytearray(f.read())
  2414. self.assertTrue(len(msg) > 50000)
  2415. codedict, count, symbol = canonical_huffman(Counter(msg))
  2416. a = bitarray()
  2417. a.encode(codedict, msg)
  2418. self.assertEqual(bytearray(canonical_decode(a, count, symbol)), msg)
  2419. self.check_code(codedict, count, symbol)
  2420. def test_canonical_decode_symbol_change(self):
  2421. msg = bytearray(b"Hello World!")
  2422. codedict, count, symbol = canonical_huffman(Counter(msg))
  2423. self.check_code(codedict, count, symbol)
  2424. a = bitarray()
  2425. a.encode(codedict, 10 * msg)
  2426. it = canonical_decode(a, count, symbol)
  2427. def decode_one_msg():
  2428. return bytearray(next(it) for _ in range(len(msg)))
  2429. self.assertEqual(decode_one_msg(), msg)
  2430. symbol[symbol.index(ord("l"))] = ord("k")
  2431. self.assertEqual(decode_one_msg(), bytearray(b"Hekko Workd!"))
  2432. del symbol[:]
  2433. self.assertRaises(IndexError, decode_one_msg)
  2434. def ensure_sorted(self, chc, symbol):
  2435. # ensure codes are sorted
  2436. for i in range(len(symbol) - 1):
  2437. a = chc[symbol[i]]
  2438. b = chc[symbol[i + 1]]
  2439. self.assertTrue(ba2int(a) < ba2int(b))
  2440. def ensure_consecutive(self, chc, count, symbol):
  2441. start = 0
  2442. for nbits, cnt in enumerate(count):
  2443. for i in range(start, start + cnt - 1):
  2444. # ensure two consecutive codes (with same bit length) have
  2445. # consecutive integer values
  2446. a = chc[symbol[i]]
  2447. b = chc[symbol[i + 1]]
  2448. self.assertTrue(len(a) == len(b) == nbits)
  2449. self.assertEqual(ba2int(a) + 1, ba2int(b))
  2450. start += cnt
  2451. def ensure_count(self, chc, count):
  2452. # ensure count list corresponds to length counts from codedict
  2453. maxbits = len(count) - 1
  2454. self.assertEqual(maxbits, max(len(a) for a in chc.values()))
  2455. my_count = (maxbits + 1) * [0]
  2456. for a in chc.values():
  2457. self.assertEqual(a.endian, 'big')
  2458. my_count[len(a)] += 1
  2459. self.assertEqual(my_count, count)
  2460. def ensure_complete(self, count):
  2461. # ensure code is complete and not oversubscribed
  2462. len_c = len(count)
  2463. x = sum(count[i] << (len_c - i) for i in range(1, len_c))
  2464. self.assertEqual(x, 1 << len_c)
  2465. def ensure_complete_2(self, chc):
  2466. # ensure code is complete
  2467. dt = decodetree(chc)
  2468. self.assertTrue(dt.complete())
  2469. def ensure_round_trip(self, chc, count, symbol):
  2470. # create a short test message, encode and decode
  2471. msg = choices(symbol, k=10)
  2472. a = bitarray()
  2473. a.encode(chc, msg)
  2474. it = canonical_decode(a, count, symbol)
  2475. # the iterator holds a reference to the bitarray and symbol list
  2476. del a, count, symbol
  2477. self.assertEqual(type(it).__name__, 'canonical_decodeiter')
  2478. self.assertEqual(list(it), msg)
  2479. def check_code(self, chc, count, symbol):
  2480. self.assertTrue(len(chc) == len(symbol) == sum(count))
  2481. self.assertEqual(count[0], 0) # no codes have length 0
  2482. self.assertTrue(set(chc) == set(symbol))
  2483. # the code of the last symbol has all 1 bits
  2484. self.assertTrue(chc[symbol[-1]].all())
  2485. # the code of the first symbol starts with bit 0
  2486. self.assertFalse(chc[symbol[0]][0])
  2487. self.ensure_sorted(chc, symbol)
  2488. self.ensure_consecutive(chc, count, symbol)
  2489. self.ensure_count(chc, count)
  2490. self.ensure_complete(count)
  2491. self.ensure_complete_2(chc)
  2492. self.ensure_round_trip(chc, count, symbol)
  2493. def test_simple_counter(self):
  2494. plain = bytearray(b'the quick brown fox jumps over the lazy dog.')
  2495. cnt = Counter(plain)
  2496. self.check_code(*canonical_huffman(cnt))
  2497. def test_no_comp(self):
  2498. freq = {None: 1, "A": 1} # None and "A" are not comparable
  2499. self.check_code(*canonical_huffman(freq))
  2500. def test_balanced(self):
  2501. n = 7
  2502. freq = {}
  2503. for i in range(1 << n):
  2504. freq[i] = 1
  2505. code, count, sym = canonical_huffman(freq)
  2506. self.assertEqual(len(code), 1 << n)
  2507. self.assertTrue(all(len(v) == n for v in code.values()))
  2508. self.check_code(code, count, sym)
  2509. def test_unbalanced(self):
  2510. n = 32
  2511. freq = {}
  2512. for i in range(n):
  2513. freq[i] = 1 << i
  2514. code = canonical_huffman(freq)[0]
  2515. self.assertEqual(len(code), n)
  2516. for i in range(n):
  2517. self.assertEqual(len(code[i]), n - max(1, i))
  2518. self.check_code(*canonical_huffman(freq))
  2519. def test_random_freq(self):
  2520. for n in 2, 3, 4, randint(5, 200):
  2521. freq = {i: random() for i in range(n)}
  2522. self.check_code(*canonical_huffman(freq))
  2523. # ---------------------------------------------------------------------------
  2524. if __name__ == '__main__':
  2525. unittest.main()