test_large_payloads.py 9.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273
  1. # -*- coding: utf-8 -*-
  2. import unittest
  3. import struct
  4. from unittest.mock import Mock, patch, MagicMock
  5. from websocket._abnf import ABNF
  6. from websocket._core import WebSocket
  7. from websocket._exceptions import WebSocketProtocolException, WebSocketPayloadException
  8. from websocket._ssl_compat import SSLError
  9. """
  10. test_large_payloads.py
  11. websocket - WebSocket client library for Python
  12. Copyright 2025 engn33r
  13. Licensed under the Apache License, Version 2.0 (the "License");
  14. you may not use this file except in compliance with the License.
  15. You may obtain a copy of the License at
  16. http://www.apache.org/licenses/LICENSE-2.0
  17. Unless required by applicable law or agreed to in writing, software
  18. distributed under the License is distributed on an "AS IS" BASIS,
  19. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  20. See the License for the specific language governing permissions and
  21. limitations under the License.
  22. """
  23. class LargePayloadTest(unittest.TestCase):
  24. def test_frame_length_encoding_boundaries(self):
  25. """Test WebSocket frame length encoding at various boundaries"""
  26. # Test length encoding boundaries as per RFC 6455
  27. test_cases = [
  28. (125, "Single byte length"), # Max for 7-bit length
  29. (126, "Two byte length start"), # Start of 16-bit length
  30. (127, "Two byte length"),
  31. (65535, "Two byte length max"), # Max for 16-bit length
  32. (65536, "Eight byte length start"), # Start of 64-bit length
  33. (16384, "16KB boundary"), # The problematic size
  34. (16385, "Just over 16KB"),
  35. (32768, "32KB"),
  36. (131072, "128KB"),
  37. ]
  38. for length, description in test_cases:
  39. with self.subTest(length=length, description=description):
  40. # Create payload of specified length
  41. payload = b"A" * length
  42. # Create frame
  43. frame = ABNF.create_frame(payload, ABNF.OPCODE_BINARY)
  44. # Verify frame can be formatted without error
  45. formatted = frame.format()
  46. # Verify the frame header is correctly structured
  47. self.assertIsInstance(formatted, bytes)
  48. self.assertTrue(len(formatted) >= length) # Header + payload
  49. # Verify payload length is preserved
  50. self.assertEqual(len(frame.data), length)
  51. def test_recv_large_payload_chunked(self):
  52. """Test receiving large payloads in chunks (simulating the 16KB recv issue)"""
  53. # Create a large payload that would trigger chunked reading
  54. large_payload = b"B" * 32768 # 32KB
  55. # Mock recv function that returns data in 16KB chunks
  56. chunks = []
  57. chunk_size = 16384
  58. for i in range(0, len(large_payload), chunk_size):
  59. chunks.append(large_payload[i : i + chunk_size])
  60. call_count = 0
  61. def mock_recv(bufsize):
  62. nonlocal call_count
  63. if call_count >= len(chunks):
  64. return b""
  65. result = chunks[call_count]
  66. call_count += 1
  67. return result
  68. # Test the frame buffer's recv_strict method
  69. from websocket._abnf import frame_buffer
  70. fb = frame_buffer(mock_recv, skip_utf8_validation=True)
  71. # This should handle large payloads by chunking
  72. result = fb.recv_strict(len(large_payload))
  73. self.assertEqual(result, large_payload)
  74. # Verify multiple recv calls were made
  75. self.assertGreater(call_count, 1)
  76. def test_ssl_large_payload_simulation(self):
  77. """Simulate SSL BAD_LENGTH error scenario"""
  78. # This test demonstrates that the 16KB limit in frame buffer protects against SSL issues
  79. payload_size = 16385
  80. recv_calls = []
  81. def mock_recv_with_ssl_limit(bufsize):
  82. recv_calls.append(bufsize)
  83. # This simulates the SSL issue: BAD_LENGTH when trying to recv > 16KB
  84. if bufsize > 16384:
  85. raise SSLError("[SSL: BAD_LENGTH] unknown error")
  86. return b"C" * min(bufsize, 16384)
  87. from websocket._abnf import frame_buffer
  88. fb = frame_buffer(mock_recv_with_ssl_limit, skip_utf8_validation=True)
  89. # The frame buffer handles this correctly by chunking recv calls
  90. result = fb.recv_strict(payload_size)
  91. # Verify it worked and chunked the calls properly
  92. self.assertEqual(len(result), payload_size)
  93. # Verify no single recv call was > 16KB
  94. self.assertTrue(all(call <= 16384 for call in recv_calls))
  95. # Verify multiple calls were made
  96. self.assertGreater(len(recv_calls), 1)
  97. def test_frame_format_large_payloads(self):
  98. """Test frame formatting with various large payload sizes"""
  99. # Test sizes around potential problem areas
  100. test_sizes = [16383, 16384, 16385, 32768, 65535, 65536]
  101. for size in test_sizes:
  102. with self.subTest(size=size):
  103. payload = b"D" * size
  104. frame = ABNF.create_frame(payload, ABNF.OPCODE_BINARY)
  105. # Should not raise any exceptions
  106. formatted = frame.format()
  107. # Verify structure
  108. self.assertIsInstance(formatted, bytes)
  109. self.assertEqual(len(frame.data), size)
  110. # Verify length encoding is correct based on size
  111. # Note: frames from create_frame() include masking by default (4 extra bytes)
  112. mask_size = 4 # WebSocket frames are masked by default
  113. if size < ABNF.LENGTH_7: # < 126
  114. # Length should be encoded in single byte
  115. expected_header_size = (
  116. 2 + mask_size
  117. ) # 1 byte opcode + 1 byte length + 4 byte mask
  118. elif size < ABNF.LENGTH_16: # < 65536
  119. # Length should be encoded in 2 bytes
  120. expected_header_size = (
  121. 4 + mask_size
  122. ) # 1 byte opcode + 1 byte marker + 2 bytes length + 4 byte mask
  123. else:
  124. # Length should be encoded in 8 bytes
  125. expected_header_size = (
  126. 10 + mask_size
  127. ) # 1 byte opcode + 1 byte marker + 8 bytes length + 4 byte mask
  128. self.assertEqual(len(formatted), expected_header_size + size)
  129. def test_send_large_payload_chunking(self):
  130. """Test that large payloads are sent in chunks to avoid SSL issues"""
  131. mock_sock = Mock()
  132. # Track how data is sent
  133. sent_chunks = []
  134. def mock_send(data):
  135. sent_chunks.append(len(data))
  136. return len(data)
  137. mock_sock.send = mock_send
  138. mock_sock.gettimeout.return_value = 30.0
  139. # Create WebSocket with mocked socket
  140. ws = WebSocket()
  141. ws.sock = mock_sock
  142. ws.connected = True
  143. # Create large payload
  144. large_payload = b"E" * 32768 # 32KB
  145. # Send the payload
  146. with patch("websocket._core.send") as mock_send_func:
  147. mock_send_func.side_effect = lambda sock, data: len(data)
  148. # This should work without SSL errors
  149. result = ws.send_binary(large_payload)
  150. # Verify payload was accepted
  151. self.assertGreater(result, 0)
  152. def test_utf8_validation_large_text(self):
  153. """Test UTF-8 validation with large text payloads"""
  154. # Create large valid UTF-8 text
  155. large_text = "Hello 世界! " * 2000 # About 26KB with Unicode
  156. # Test frame creation
  157. frame = ABNF.create_frame(large_text, ABNF.OPCODE_TEXT)
  158. # Should not raise validation errors
  159. formatted = frame.format()
  160. self.assertIsInstance(formatted, bytes)
  161. # Test with close frame that has invalid UTF-8 (this is what validate() actually checks)
  162. invalid_utf8_close_data = struct.pack("!H", 1000) + b"\xff\xfe invalid utf8"
  163. # Create close frame with invalid UTF-8 data
  164. frame = ABNF(1, 0, 0, 0, ABNF.OPCODE_CLOSE, 1, invalid_utf8_close_data)
  165. # Validation should catch the invalid UTF-8 in close frame reason
  166. with self.assertRaises(WebSocketProtocolException):
  167. frame.validate(skip_utf8_validation=False)
  168. def test_frame_buffer_edge_cases(self):
  169. """Test frame buffer with edge cases that could trigger bugs"""
  170. # Test scenario: exactly 16KB payload split across recv calls
  171. payload_16k = b"F" * 16384
  172. # Simulate receiving in smaller chunks
  173. chunks = [payload_16k[i : i + 4096] for i in range(0, len(payload_16k), 4096)]
  174. call_count = 0
  175. def mock_recv(bufsize):
  176. nonlocal call_count
  177. if call_count >= len(chunks):
  178. return b""
  179. result = chunks[call_count]
  180. call_count += 1
  181. return result
  182. from websocket._abnf import frame_buffer
  183. fb = frame_buffer(mock_recv, skip_utf8_validation=True)
  184. result = fb.recv_strict(16384)
  185. self.assertEqual(result, payload_16k)
  186. # Verify multiple recv calls were made
  187. self.assertEqual(call_count, 4) # 16KB / 4KB = 4 chunks
  188. def test_max_frame_size_limits(self):
  189. """Test behavior at WebSocket maximum frame size limits"""
  190. # Test just under the maximum theoretical frame size
  191. # (This is a very large test, so we'll use a smaller representative size)
  192. # Test with a reasonably large payload that represents the issue
  193. large_size = 1024 * 1024 # 1MB
  194. payload = b"G" * large_size
  195. # This should work without issues
  196. frame = ABNF.create_frame(payload, ABNF.OPCODE_BINARY)
  197. # Verify the frame can be formatted
  198. formatted = frame.format()
  199. self.assertIsInstance(formatted, bytes)
  200. # Verify payload is preserved
  201. self.assertEqual(len(frame.data), large_size)
  202. if __name__ == "__main__":
  203. unittest.main()