url_parser.pyx 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108
  1. #cython: language_level=3
  2. from __future__ import print_function
  3. from cpython.mem cimport PyMem_Malloc, PyMem_Free
  4. from cpython cimport PyObject_GetBuffer, PyBuffer_Release, PyBUF_SIMPLE, \
  5. Py_buffer
  6. from .errors import HttpParserInvalidURLError
  7. cimport cython
  8. from . cimport url_cparser as uparser
  9. __all__ = ('parse_url',)
  10. @cython.freelist(250)
  11. cdef class URL:
  12. cdef readonly bytes schema
  13. cdef readonly bytes host
  14. cdef readonly object port
  15. cdef readonly bytes path
  16. cdef readonly bytes query
  17. cdef readonly bytes fragment
  18. cdef readonly bytes userinfo
  19. def __cinit__(self, bytes schema, bytes host, object port, bytes path,
  20. bytes query, bytes fragment, bytes userinfo):
  21. self.schema = schema
  22. self.host = host
  23. self.port = port
  24. self.path = path
  25. self.query = query
  26. self.fragment = fragment
  27. self.userinfo = userinfo
  28. def __repr__(self):
  29. return ('<URL schema: {!r}, host: {!r}, port: {!r}, path: {!r}, '
  30. 'query: {!r}, fragment: {!r}, userinfo: {!r}>'
  31. .format(self.schema, self.host, self.port, self.path,
  32. self.query, self.fragment, self.userinfo))
  33. def parse_url(url):
  34. cdef:
  35. Py_buffer py_buf
  36. char* buf_data
  37. uparser.http_parser_url* parsed
  38. int res
  39. bytes schema = None
  40. bytes host = None
  41. object port = None
  42. bytes path = None
  43. bytes query = None
  44. bytes fragment = None
  45. bytes userinfo = None
  46. object result = None
  47. int off
  48. int ln
  49. parsed = <uparser.http_parser_url*> \
  50. PyMem_Malloc(sizeof(uparser.http_parser_url))
  51. uparser.http_parser_url_init(parsed)
  52. PyObject_GetBuffer(url, &py_buf, PyBUF_SIMPLE)
  53. try:
  54. buf_data = <char*>py_buf.buf
  55. res = uparser.http_parser_parse_url(buf_data, py_buf.len, 0, parsed)
  56. if res == 0:
  57. if parsed.field_set & (1 << uparser.UF_SCHEMA):
  58. off = parsed.field_data[<int>uparser.UF_SCHEMA].off
  59. ln = parsed.field_data[<int>uparser.UF_SCHEMA].len
  60. schema = buf_data[off:off+ln]
  61. if parsed.field_set & (1 << uparser.UF_HOST):
  62. off = parsed.field_data[<int>uparser.UF_HOST].off
  63. ln = parsed.field_data[<int>uparser.UF_HOST].len
  64. host = buf_data[off:off+ln]
  65. if parsed.field_set & (1 << uparser.UF_PORT):
  66. port = parsed.port
  67. if parsed.field_set & (1 << uparser.UF_PATH):
  68. off = parsed.field_data[<int>uparser.UF_PATH].off
  69. ln = parsed.field_data[<int>uparser.UF_PATH].len
  70. path = buf_data[off:off+ln]
  71. if parsed.field_set & (1 << uparser.UF_QUERY):
  72. off = parsed.field_data[<int>uparser.UF_QUERY].off
  73. ln = parsed.field_data[<int>uparser.UF_QUERY].len
  74. query = buf_data[off:off+ln]
  75. if parsed.field_set & (1 << uparser.UF_FRAGMENT):
  76. off = parsed.field_data[<int>uparser.UF_FRAGMENT].off
  77. ln = parsed.field_data[<int>uparser.UF_FRAGMENT].len
  78. fragment = buf_data[off:off+ln]
  79. if parsed.field_set & (1 << uparser.UF_USERINFO):
  80. off = parsed.field_data[<int>uparser.UF_USERINFO].off
  81. ln = parsed.field_data[<int>uparser.UF_USERINFO].len
  82. userinfo = buf_data[off:off+ln]
  83. return URL(schema, host, port, path, query, fragment, userinfo)
  84. else:
  85. raise HttpParserInvalidURLError("invalid url {!r}".format(url))
  86. finally:
  87. PyBuffer_Release(&py_buf)
  88. PyMem_Free(parsed)