Avoid retrying open_connection on unrecoverable errors (#340)

* Avoid retrying open_connection on unrecoverable errors

- We can retry so hard that we block the event loop

Fixes
```
2022-04-16 22:18:51 WARNING (MainThread) [asyncio] Executing <Task finished name=Task-3576 coro=<open_connection() done, defined at /opt/homebrew/Cellar/python@3.9/3.9.12/Frameworks/Python.framework/Versions/3.9/lib/python3.9/asyncio/streams.py:25> exception=ConnectionRefusedError(61, "Connect call failed (192.168.107.200, 9999)") created at /opt/homebrew/Cellar/python@3.9/3.9.12/Frameworks/Python.framework/Versions/3.9/lib/python3.9/asyncio/tasks.py:460> took 1.001 seconds
```

* comment
This commit is contained in:
J. Nick Koston 2022-04-24 07:38:42 -10:00 committed by GitHub
parent d2581bf077
commit d908a5ab2a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 57 additions and 0 deletions

View File

@ -11,6 +11,7 @@ http://www.apache.org/licenses/LICENSE-2.0
"""
import asyncio
import contextlib
import errno
import json
import logging
import struct
@ -20,6 +21,7 @@ from typing import Dict, Generator, Optional, Union
from .exceptions import SmartDeviceException
_LOGGER = logging.getLogger(__name__)
_NO_RETRY_ERRORS = {errno.EHOSTDOWN, errno.EHOSTUNREACH, errno.ECONNREFUSED}
class TPLinkSmartHomeProtocol:
@ -115,9 +117,30 @@ class TPLinkSmartHomeProtocol:
async def _query(self, request: str, retry_count: int, timeout: int) -> Dict:
"""Try to query a device."""
#
# Most of the time we will already be connected if the device is online
# and the connect call will do nothing and return right away
#
# However, if we get an unrecoverable error (_NO_RETRY_ERRORS and ConnectionRefusedError)
# we do not want to keep trying since many connection open/close operations
# in the same time frame can block the event loop. This is especially
# import when there are multiple tplink devices being polled.
#
for retry in range(retry_count + 1):
try:
await self._connect(timeout)
except ConnectionRefusedError as ex:
await self.close()
raise SmartDeviceException(
f"Unable to connect to the device: {self.host}: {ex}"
)
except OSError as ex:
await self.close()
if ex.errno in _NO_RETRY_ERRORS or retry >= retry_count:
raise SmartDeviceException(
f"Unable to connect to the device: {self.host}: {ex}"
)
continue
except Exception as ex:
await self.close()
if retry >= retry_count:

View File

@ -1,3 +1,4 @@
import errno
import json
import logging
import struct
@ -29,6 +30,39 @@ async def test_protocol_retries(mocker, retry_count):
assert conn.call_count == retry_count + 1
async def test_protocol_no_retry_on_unreachable(mocker):
conn = mocker.patch(
"asyncio.open_connection",
side_effect=OSError(errno.EHOSTUNREACH, "No route to host"),
)
with pytest.raises(SmartDeviceException):
await TPLinkSmartHomeProtocol("127.0.0.1").query({}, retry_count=5)
assert conn.call_count == 1
async def test_protocol_no_retry_connection_refused(mocker):
conn = mocker.patch(
"asyncio.open_connection",
side_effect=ConnectionRefusedError,
)
with pytest.raises(SmartDeviceException):
await TPLinkSmartHomeProtocol("127.0.0.1").query({}, retry_count=5)
assert conn.call_count == 1
async def test_protocol_retry_recoverable_error(mocker):
conn = mocker.patch(
"asyncio.open_connection",
side_effect=OSError(errno.ECONNRESET, "Connection reset by peer"),
)
with pytest.raises(SmartDeviceException):
await TPLinkSmartHomeProtocol("127.0.0.1").query({}, retry_count=5)
assert conn.call_count == 6
@pytest.mark.skipif(sys.version_info < (3, 8), reason="3.8 is first one with asyncmock")
@pytest.mark.parametrize("retry_count", [1, 3, 5])
async def test_protocol_reconnect(mocker, retry_count):