dicttoolz.pyx 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575
  1. from cpython.dict cimport (PyDict_Check, PyDict_CheckExact, PyDict_GetItem,
  2. PyDict_New, PyDict_Next,
  3. PyDict_SetItem, PyDict_Update, PyDict_DelItem)
  4. from cpython.list cimport PyList_Append, PyList_New
  5. from cpython.object cimport PyObject_SetItem
  6. from cpython.ref cimport PyObject, Py_DECREF, Py_INCREF, Py_XDECREF
  7. # Locally defined bindings that differ from `cython.cpython` bindings
  8. from cytoolz.cpython cimport PyDict_Next_Compat, PtrIter_Next
  9. from copy import copy
  10. from collections.abc import Mapping
  11. __all__ = ['merge', 'merge_with', 'valmap', 'keymap', 'itemmap', 'valfilter',
  12. 'keyfilter', 'itemfilter', 'assoc', 'dissoc', 'assoc_in', 'get_in',
  13. 'update_in']
  14. cdef class _iter_mapping:
  15. """ Keep a handle on the current item to prevent memory clean up too early"""
  16. def __cinit__(self, object it):
  17. self.it = it
  18. self.cur = None
  19. def __iter__(self):
  20. return self
  21. def __next__(self):
  22. self.cur = next(self.it)
  23. return self.cur
  24. cdef int PyMapping_Next(object p, Py_ssize_t *ppos, PyObject* *pkey, PyObject* *pval) except -1:
  25. """Mimic "PyDict_Next" interface, but for any mapping"""
  26. cdef PyObject *obj
  27. obj = PtrIter_Next(p)
  28. if obj is NULL:
  29. return 0
  30. pkey[0] = <PyObject*>(<object>obj)[0]
  31. pval[0] = <PyObject*>(<object>obj)[1]
  32. Py_XDECREF(obj) # removing this results in memory leak
  33. return 1
  34. cdef f_map_next get_map_iter(object d, PyObject* *ptr) except NULL:
  35. """Return function pointer to perform iteration over object returned in ptr.
  36. The returned function signature matches "PyDict_Next". If ``d`` is a dict,
  37. then the returned function *is* PyDict_Next, so iteration wil be very fast.
  38. The object returned through ``ptr`` needs to have its reference count
  39. reduced by one once the caller "owns" the object.
  40. This function lets us control exactly how iteration should be performed
  41. over a given mapping. The current rules are:
  42. 1) If ``d`` is exactly a dict, use PyDict_Next
  43. 2) If ``d`` is subtype of dict, use PyMapping_Next. This lets the user
  44. control the order iteration, such as for ordereddict.
  45. 3) If using PyMapping_Next, iterate using ``iteritems`` if possible,
  46. otherwise iterate using ``items``.
  47. """
  48. cdef object val
  49. cdef f_map_next rv
  50. if PyDict_CheckExact(d):
  51. val = d
  52. rv = &PyDict_Next_Compat
  53. else:
  54. val = _iter_mapping(iter(d.items()))
  55. rv = &PyMapping_Next
  56. Py_INCREF(val)
  57. ptr[0] = <PyObject*>val
  58. return rv
  59. cdef get_factory(name, kwargs):
  60. factory = kwargs.pop('factory', dict)
  61. if kwargs:
  62. raise TypeError("{0}() got an unexpected keyword argument "
  63. "'{1}'".format(name, kwargs.popitem()[0]))
  64. return factory
  65. cdef object c_merge(object dicts, object factory=dict):
  66. cdef object rv
  67. rv = factory()
  68. if PyDict_CheckExact(rv):
  69. for d in dicts:
  70. PyDict_Update(rv, d)
  71. else:
  72. for d in dicts:
  73. rv.update(d)
  74. return rv
  75. def merge(*dicts, **kwargs):
  76. """
  77. Merge a collection of dictionaries
  78. >>> merge({1: 'one'}, {2: 'two'})
  79. {1: 'one', 2: 'two'}
  80. Later dictionaries have precedence
  81. >>> merge({1: 2, 3: 4}, {3: 3, 4: 4})
  82. {1: 2, 3: 3, 4: 4}
  83. See Also:
  84. merge_with
  85. """
  86. if len(dicts) == 1 and not isinstance(dicts[0], Mapping):
  87. dicts = dicts[0]
  88. factory = get_factory('merge', kwargs)
  89. return c_merge(dicts, factory)
  90. cdef object c_merge_with(object func, object dicts, object factory=dict):
  91. cdef:
  92. dict result
  93. object rv, d
  94. list seq
  95. f_map_next f
  96. PyObject *obj
  97. PyObject *pkey
  98. PyObject *pval
  99. Py_ssize_t pos
  100. result = PyDict_New()
  101. rv = factory()
  102. for d in dicts:
  103. f = get_map_iter(d, &obj)
  104. d = <object>obj
  105. Py_DECREF(d)
  106. pos = 0
  107. while f(d, &pos, &pkey, &pval):
  108. obj = PyDict_GetItem(result, <object>pkey)
  109. if obj is NULL:
  110. seq = PyList_New(0)
  111. PyList_Append(seq, <object>pval)
  112. PyDict_SetItem(result, <object>pkey, seq)
  113. else:
  114. PyList_Append(<object>obj, <object>pval)
  115. f = get_map_iter(result, &obj)
  116. d = <object>obj
  117. Py_DECREF(d)
  118. pos = 0
  119. while f(d, &pos, &pkey, &pval):
  120. PyObject_SetItem(rv, <object>pkey, func(<object>pval))
  121. return rv
  122. def merge_with(func, *dicts, **kwargs):
  123. """
  124. Merge dictionaries and apply function to combined values
  125. A key may occur in more than one dict, and all values mapped from the key
  126. will be passed to the function as a list, such as func([val1, val2, ...]).
  127. >>> merge_with(sum, {1: 1, 2: 2}, {1: 10, 2: 20})
  128. {1: 11, 2: 22}
  129. >>> merge_with(first, {1: 1, 2: 2}, {2: 20, 3: 30}) # doctest: +SKIP
  130. {1: 1, 2: 2, 3: 30}
  131. See Also:
  132. merge
  133. """
  134. if len(dicts) == 1 and not isinstance(dicts[0], Mapping):
  135. dicts = dicts[0]
  136. factory = get_factory('merge_with', kwargs)
  137. return c_merge_with(func, dicts, factory)
  138. cpdef object valmap(object func, object d, object factory=dict):
  139. """
  140. Apply function to values of dictionary
  141. >>> bills = {"Alice": [20, 15, 30], "Bob": [10, 35]}
  142. >>> valmap(sum, bills) # doctest: +SKIP
  143. {'Alice': 65, 'Bob': 45}
  144. See Also:
  145. keymap
  146. itemmap
  147. """
  148. cdef:
  149. object rv
  150. f_map_next f
  151. PyObject *obj
  152. PyObject *pkey
  153. PyObject *pval
  154. Py_ssize_t pos = 0
  155. rv = factory()
  156. f = get_map_iter(d, &obj)
  157. d = <object>obj
  158. Py_DECREF(d)
  159. while f(d, &pos, &pkey, &pval):
  160. rv[<object>pkey] = func(<object>pval)
  161. return rv
  162. cpdef object keymap(object func, object d, object factory=dict):
  163. """
  164. Apply function to keys of dictionary
  165. >>> bills = {"Alice": [20, 15, 30], "Bob": [10, 35]}
  166. >>> keymap(str.lower, bills) # doctest: +SKIP
  167. {'alice': [20, 15, 30], 'bob': [10, 35]}
  168. See Also:
  169. valmap
  170. itemmap
  171. """
  172. cdef:
  173. object rv
  174. f_map_next f
  175. PyObject *obj
  176. PyObject *pkey
  177. PyObject *pval
  178. Py_ssize_t pos = 0
  179. rv = factory()
  180. f = get_map_iter(d, &obj)
  181. d = <object>obj
  182. Py_DECREF(d)
  183. while f(d, &pos, &pkey, &pval):
  184. rv[func(<object>pkey)] = <object>pval
  185. return rv
  186. cpdef object itemmap(object func, object d, object factory=dict):
  187. """
  188. Apply function to items of dictionary
  189. >>> accountids = {"Alice": 10, "Bob": 20}
  190. >>> itemmap(reversed, accountids) # doctest: +SKIP
  191. {10: "Alice", 20: "Bob"}
  192. See Also:
  193. keymap
  194. valmap
  195. """
  196. cdef:
  197. object rv, k, v
  198. f_map_next f
  199. PyObject *obj
  200. PyObject *pkey
  201. PyObject *pval
  202. Py_ssize_t pos = 0
  203. rv = factory()
  204. f = get_map_iter(d, &obj)
  205. d = <object>obj
  206. Py_DECREF(d)
  207. while f(d, &pos, &pkey, &pval):
  208. k, v = func((<object>pkey, <object>pval))
  209. rv[k] = v
  210. return rv
  211. cpdef object valfilter(object predicate, object d, object factory=dict):
  212. """
  213. Filter items in dictionary by value
  214. >>> iseven = lambda x: x % 2 == 0
  215. >>> d = {1: 2, 2: 3, 3: 4, 4: 5}
  216. >>> valfilter(iseven, d)
  217. {1: 2, 3: 4}
  218. See Also:
  219. keyfilter
  220. itemfilter
  221. valmap
  222. """
  223. cdef:
  224. object rv
  225. f_map_next f
  226. PyObject *obj
  227. PyObject *pkey
  228. PyObject *pval
  229. Py_ssize_t pos = 0
  230. rv = factory()
  231. f = get_map_iter(d, &obj)
  232. d = <object>obj
  233. Py_DECREF(d)
  234. while f(d, &pos, &pkey, &pval):
  235. if predicate(<object>pval):
  236. rv[<object>pkey] = <object>pval
  237. return rv
  238. cpdef object keyfilter(object predicate, object d, object factory=dict):
  239. """
  240. Filter items in dictionary by key
  241. >>> iseven = lambda x: x % 2 == 0
  242. >>> d = {1: 2, 2: 3, 3: 4, 4: 5}
  243. >>> keyfilter(iseven, d)
  244. {2: 3, 4: 5}
  245. See Also:
  246. valfilter
  247. itemfilter
  248. keymap
  249. """
  250. cdef:
  251. object rv
  252. f_map_next f
  253. PyObject *obj
  254. PyObject *pkey
  255. PyObject *pval
  256. Py_ssize_t pos = 0
  257. rv = factory()
  258. f = get_map_iter(d, &obj)
  259. d = <object>obj
  260. Py_DECREF(d)
  261. while f(d, &pos, &pkey, &pval):
  262. if predicate(<object>pkey):
  263. rv[<object>pkey] = <object>pval
  264. return rv
  265. cpdef object itemfilter(object predicate, object d, object factory=dict):
  266. """
  267. Filter items in dictionary by item
  268. >>> def isvalid(item):
  269. ... k, v = item
  270. ... return k % 2 == 0 and v < 4
  271. >>> d = {1: 2, 2: 3, 3: 4, 4: 5}
  272. >>> itemfilter(isvalid, d)
  273. {2: 3}
  274. See Also:
  275. keyfilter
  276. valfilter
  277. itemmap
  278. """
  279. cdef:
  280. object rv, k, v
  281. f_map_next f
  282. PyObject *obj
  283. PyObject *pkey
  284. PyObject *pval
  285. Py_ssize_t pos = 0
  286. rv = factory()
  287. f = get_map_iter(d, &obj)
  288. d = <object>obj
  289. Py_DECREF(d)
  290. while f(d, &pos, &pkey, &pval):
  291. k = <object>pkey
  292. v = <object>pval
  293. if predicate((k, v)):
  294. rv[k] = v
  295. return rv
  296. cpdef object assoc(object d, object key, object value, object factory=dict):
  297. """
  298. Return a new dict with new key value pair
  299. New dict has d[key] set to value. Does not modify the initial dictionary.
  300. >>> assoc({'x': 1}, 'x', 2)
  301. {'x': 2}
  302. >>> assoc({'x': 1}, 'y', 3) # doctest: +SKIP
  303. {'x': 1, 'y': 3}
  304. """
  305. cdef object rv
  306. rv = factory()
  307. if PyDict_CheckExact(rv):
  308. PyDict_Update(rv, d)
  309. else:
  310. rv.update(d)
  311. rv[key] = value
  312. return rv
  313. cpdef object assoc_in(object d, object keys, object value, object factory=dict):
  314. """
  315. Return a new dict with new, potentially nested, key value pair
  316. >>> purchase = {'name': 'Alice',
  317. ... 'order': {'items': ['Apple', 'Orange'],
  318. ... 'costs': [0.50, 1.25]},
  319. ... 'credit card': '5555-1234-1234-1234'}
  320. >>> assoc_in(purchase, ['order', 'costs'], [0.25, 1.00]) # doctest: +SKIP
  321. {'credit card': '5555-1234-1234-1234',
  322. 'name': 'Alice',
  323. 'order': {'costs': [0.25, 1.00], 'items': ['Apple', 'Orange']}}
  324. """
  325. cdef object prevkey, key
  326. cdef object rv, inner, dtemp
  327. prevkey, keys = keys[0], keys[1:]
  328. rv = factory()
  329. if PyDict_CheckExact(rv):
  330. PyDict_Update(rv, d)
  331. else:
  332. rv.update(d)
  333. inner = rv
  334. for key in keys:
  335. if prevkey in d:
  336. d = d[prevkey]
  337. dtemp = factory()
  338. if PyDict_CheckExact(dtemp):
  339. PyDict_Update(dtemp, d)
  340. else:
  341. dtemp.update(d)
  342. else:
  343. d = factory()
  344. dtemp = d
  345. inner[prevkey] = dtemp
  346. prevkey = key
  347. inner = dtemp
  348. inner[prevkey] = value
  349. return rv
  350. cdef object c_dissoc(object d, object keys, object factory=dict):
  351. # implementation copied from toolz. Not benchmarked.
  352. cdef object rv
  353. rv = factory()
  354. if len(keys) < len(d) * 0.6:
  355. rv.update(d)
  356. for key in keys:
  357. if key in rv:
  358. del rv[key]
  359. else:
  360. remaining = set(d)
  361. remaining.difference_update(keys)
  362. for k in remaining:
  363. rv[k] = d[k]
  364. return rv
  365. def dissoc(d, *keys, **kwargs):
  366. """
  367. Return a new dict with the given key(s) removed.
  368. New dict has d[key] deleted for each supplied key.
  369. Does not modify the initial dictionary.
  370. >>> dissoc({'x': 1, 'y': 2}, 'y')
  371. {'x': 1}
  372. >>> dissoc({'x': 1, 'y': 2}, 'y', 'x')
  373. {}
  374. >>> dissoc({'x': 1}, 'y') # Ignores missing keys
  375. {'x': 1}
  376. """
  377. return c_dissoc(d, keys, get_factory('dissoc', kwargs))
  378. cpdef object update_in(object d, object keys, object func, object default=None, object factory=dict):
  379. """
  380. Update value in a (potentially) nested dictionary
  381. inputs:
  382. d - dictionary on which to operate
  383. keys - list or tuple giving the location of the value to be changed in d
  384. func - function to operate on that value
  385. If keys == [k0,..,kX] and d[k0]..[kX] == v, update_in returns a copy of the
  386. original dictionary with v replaced by func(v), but does not mutate the
  387. original dictionary.
  388. If k0 is not a key in d, update_in creates nested dictionaries to the depth
  389. specified by the keys, with the innermost value set to func(default).
  390. >>> inc = lambda x: x + 1
  391. >>> update_in({'a': 0}, ['a'], inc)
  392. {'a': 1}
  393. >>> transaction = {'name': 'Alice',
  394. ... 'purchase': {'items': ['Apple', 'Orange'],
  395. ... 'costs': [0.50, 1.25]},
  396. ... 'credit card': '5555-1234-1234-1234'}
  397. >>> update_in(transaction, ['purchase', 'costs'], sum) # doctest: +SKIP
  398. {'credit card': '5555-1234-1234-1234',
  399. 'name': 'Alice',
  400. 'purchase': {'costs': 1.75, 'items': ['Apple', 'Orange']}}
  401. >>> # updating a value when k0 is not in d
  402. >>> update_in({}, [1, 2, 3], str, default="bar")
  403. {1: {2: {3: 'bar'}}}
  404. >>> update_in({1: 'foo'}, [2, 3, 4], inc, 0)
  405. {1: 'foo', 2: {3: {4: 1}}}
  406. """
  407. cdef object prevkey, key
  408. cdef object rv, inner, dtemp
  409. prevkey, keys = keys[0], keys[1:]
  410. rv = factory()
  411. if PyDict_CheckExact(rv):
  412. PyDict_Update(rv, d)
  413. else:
  414. rv.update(d)
  415. inner = rv
  416. for key in keys:
  417. if prevkey in d:
  418. d = d[prevkey]
  419. dtemp = factory()
  420. if PyDict_CheckExact(dtemp):
  421. PyDict_Update(dtemp, d)
  422. else:
  423. dtemp.update(d)
  424. else:
  425. d = factory()
  426. dtemp = d
  427. inner[prevkey] = dtemp
  428. prevkey = key
  429. inner = dtemp
  430. if prevkey in d:
  431. key = func(d[prevkey])
  432. else:
  433. key = func(default)
  434. inner[prevkey] = key
  435. return rv
  436. cdef tuple _get_in_exceptions = (KeyError, IndexError, TypeError)
  437. cpdef object get_in(object keys, object coll, object default=None, object no_default=False):
  438. """
  439. Returns coll[i0][i1]...[iX] where [i0, i1, ..., iX]==keys.
  440. If coll[i0][i1]...[iX] cannot be found, returns ``default``, unless
  441. ``no_default`` is specified, then it raises KeyError or IndexError.
  442. ``get_in`` is a generalization of ``operator.getitem`` for nested data
  443. structures such as dictionaries and lists.
  444. >>> transaction = {'name': 'Alice',
  445. ... 'purchase': {'items': ['Apple', 'Orange'],
  446. ... 'costs': [0.50, 1.25]},
  447. ... 'credit card': '5555-1234-1234-1234'}
  448. >>> get_in(['purchase', 'items', 0], transaction)
  449. 'Apple'
  450. >>> get_in(['name'], transaction)
  451. 'Alice'
  452. >>> get_in(['purchase', 'total'], transaction)
  453. >>> get_in(['purchase', 'items', 'apple'], transaction)
  454. >>> get_in(['purchase', 'items', 10], transaction)
  455. >>> get_in(['purchase', 'total'], transaction, 0)
  456. 0
  457. >>> get_in(['y'], {}, no_default=True)
  458. Traceback (most recent call last):
  459. ...
  460. KeyError: 'y'
  461. See Also:
  462. itertoolz.get
  463. operator.getitem
  464. """
  465. cdef object item
  466. try:
  467. for item in keys:
  468. coll = coll[item]
  469. return coll
  470. except _get_in_exceptions:
  471. if no_default:
  472. raise
  473. return default