From 6f3fb6930f0c71999183dd8c92cdf6d575e54cd9 Mon Sep 17 00:00:00 2001 From: Jennifer Richards Date: Thu, 13 Jan 2022 15:06:28 +0000 Subject: [PATCH] Add timeouts to requests library calls. Fixes #3498. Commit ready for merge. - Legacy-Id: 19839 --- ietf/bin/iana-protocols-updates | 12 ++++++++++-- ietf/bin/rfc-editor-index-updates | 25 ++++++++++++++++++++----- ietf/bin/rfc-editor-queue-updates | 13 +++++++++---- ietf/doc/views_review.py | 9 +++++++-- ietf/meeting/utils.py | 6 ++++-- ietf/settings.py | 4 ++++ ietf/stats/utils.py | 10 +++++++++- ietf/sync/iana.py | 9 +++++++-- ietf/sync/rfceditor.py | 7 ++++++- ietf/utils/hedgedoc.py | 21 +++++++++++++++------ 10 files changed, 91 insertions(+), 25 deletions(-) diff --git a/ietf/bin/iana-protocols-updates b/ietf/bin/iana-protocols-updates index c3a5f28de..668ee54f9 100755 --- a/ietf/bin/iana-protocols-updates +++ b/ietf/bin/iana-protocols-updates @@ -31,8 +31,16 @@ syslog.syslog("Updating history log with new RFC entries from IANA protocols pag # FIXME: this needs to be the date where this tool is first deployed rfc_must_published_later_than = datetime.datetime(2012, 11, 26, 0, 0, 0) -text = requests.get(settings.IANA_SYNC_PROTOCOLS_URL).text -rfc_numbers = parse_protocol_page(text) +try: + response = requests.get( + settings.IANA_SYNC_PROTOCOLS_URL, + timeout=30, + ) +except requests.Timeout as exc: + syslog.syslog(f'GET request timed out retrieving IANA protocols page: {exc}') + sys.exit(1) + +rfc_numbers = parse_protocol_page(response.text) for chunk in chunks(rfc_numbers, 100): updated = update_rfc_log_from_protocol_page(chunk, rfc_must_published_later_than) diff --git a/ietf/bin/rfc-editor-index-updates b/ietf/bin/rfc-editor-index-updates index bf1b0ac54..4ff3bf373 100755 --- a/ietf/bin/rfc-editor-index-updates +++ b/ietf/bin/rfc-editor-index-updates @@ -5,10 +5,8 @@ import datetime import io -import json import os import requests -import socket import sys import syslog import traceback @@ -48,11 +46,28 @@ if options.skip_date: log("Updating document metadata from RFC index going back to %s, from %s" % (skip_date, settings.RFC_EDITOR_INDEX_URL)) -socket.setdefaulttimeout(30) -rfc_index_xml = requests.get(settings.RFC_EDITOR_INDEX_URL).text +try: + response = requests.get( + settings.RFC_EDITOR_INDEX_URL, + timeout=30, # seconds + ) +except requests.Timeout as exc: + log(f'GET request timed out retrieving RFC editor index: {exc}') + sys.exit(1) + + +rfc_index_xml = response.text index_data = ietf.sync.rfceditor.parse_index(io.StringIO(rfc_index_xml)) -errata_data = requests.get(settings.RFC_EDITOR_ERRATA_JSON_URL).json() +try: + response = requests.get( + settings.RFC_EDITOR_ERRATA_JSON_URL, + timeout=30, # seconds + ) +except requests.Timeout as exc: + log(f'GET request timed out retrieving RFC editor errata: {exc}') + sys.exit(1) +errata_data = response.json() if len(index_data) < ietf.sync.rfceditor.MIN_INDEX_RESULTS: log("Not enough index entries, only %s" % len(index_data)) diff --git a/ietf/bin/rfc-editor-queue-updates b/ietf/bin/rfc-editor-queue-updates index 08f3603c6..b441e50eb 100755 --- a/ietf/bin/rfc-editor-queue-updates +++ b/ietf/bin/rfc-editor-queue-updates @@ -3,7 +3,6 @@ import io import os import requests -import socket import sys # boilerplate @@ -21,9 +20,15 @@ from ietf.utils.log import log log("Updating RFC Editor queue states from %s" % settings.RFC_EDITOR_QUEUE_URL) -socket.setdefaulttimeout(30) -response = requests.get(settings.RFC_EDITOR_QUEUE_URL).text -drafts, warnings = parse_queue(io.StringIO(response)) +try: + response = requests.get( + settings.RFC_EDITOR_QUEUE_URL, + timeout=30, # seconds + ) +except requests.Timeout as exc: + log(f'GET request timed out retrieving RFC editor queue: {exc}') + sys.exit(1) +drafts, warnings = parse_queue(io.StringIO(response.text)) for w in warnings: log(u"Warning: %s" % w) diff --git a/ietf/doc/views_review.py b/ietf/doc/views_review.py index 6bf431b75..b72323283 100644 --- a/ietf/doc/views_review.py +++ b/ietf/doc/views_review.py @@ -44,6 +44,7 @@ from ietf.review.utils import (active_review_teams, assign_review_request_to_rev close_review_request_states, close_review_request) from ietf.review import mailarch +from ietf.utils import log from ietf.utils.fields import DatepickerDateField from ietf.utils.text import strip_prefix, xslugify from ietf.utils.textupload import get_cleaned_text_file_content @@ -621,9 +622,13 @@ class CompleteReviewForm(forms.Form): url = self.cleaned_data['review_url'] #scheme, netloc, path, parameters, query, fragment = urlparse(url) if url: - r = requests.get(url) + try: + r = requests.get(url, timeout=settings.DEFAULT_REQUESTS_TIMEOUT) + except requests.Timeout as exc: + log.log(f'GET request timed out for [{url}]: {exc}') + raise forms.ValidationError("Trying to retrieve the URL resulted in a request timeout. Please provide a URL that can be retrieved.") from exc if r.status_code != 200: - raise forms.ValidationError("Trying to retrieve the URL resulted in status code %s: %s. Please provide an URL that can be retrieved." % (r.status_code, r.reason)) + raise forms.ValidationError("Trying to retrieve the URL resulted in status code %s: %s. Please provide a URL that can be retrieved." % (r.status_code, r.reason)) return url def clean(self): diff --git a/ietf/meeting/utils.py b/ietf/meeting/utils.py index 6b6174191..babbf1897 100644 --- a/ietf/meeting/utils.py +++ b/ietf/meeting/utils.py @@ -28,6 +28,7 @@ from ietf.name.models import SessionStatusName, ConstraintName from ietf.person.models import Person from ietf.secr.proceedings.proc_utils import import_audio_files from ietf.utils.html import sanitize_document +from ietf.utils.log import log def session_time_for_sorting(session, use_meeting_date): @@ -123,9 +124,10 @@ def create_proceedings_templates(meeting): # Get meeting attendees from registration system url = settings.STATS_REGISTRATION_ATTENDEES_JSON_URL.format(number=meeting.number) try: - attendees = requests.get(url).json() - except (ValueError, HTTPError): + attendees = requests.get(url, timeout=settings.DEFAULT_REQUESTS_TIMEOUT).json() + except (ValueError, HTTPError, requests.Timeout) as exc: attendees = [] + log(f'Failed to retrieve meeting attendees from [{url}]: {exc}') if attendees: attendees = sorted(attendees, key = lambda a: a['LastName']) diff --git a/ietf/settings.py b/ietf/settings.py index 17b5dc24f..219545398 100644 --- a/ietf/settings.py +++ b/ietf/settings.py @@ -1232,6 +1232,10 @@ qvNU+qRWi+YXrITsgn92/gVxX5AoK0n+s5Lx7fpjxkARVi66SF6zTJnX -----END PRIVATE KEY----- """ + +# Default timeout for HTTP requests via the requests library +DEFAULT_REQUESTS_TIMEOUT = 20 # seconds + # Put the production SECRET_KEY in settings_local.py, and also any other # sensitive or site-specific changes. DO NOT commit settings_local.py to svn. from ietf.settings_local import * # pyflakes:ignore pylint: disable=wildcard-import diff --git a/ietf/stats/utils.py b/ietf/stats/utils.py index 87b827ab5..75c7e56cc 100644 --- a/ietf/stats/utils.py +++ b/ietf/stats/utils.py @@ -15,6 +15,7 @@ from ietf.stats.models import AffiliationAlias, AffiliationIgnoredEnding, Countr from ietf.name.models import CountryName from ietf.person.models import Person, Email, Alias from ietf.person.name import unidecode_name +from ietf.utils.log import log def compile_affiliation_ending_stripping_regexp(): @@ -230,7 +231,14 @@ def get_meeting_registration_data(meeting): """ num_created = 0 num_processed = 0 - response = requests.get(settings.STATS_REGISTRATION_ATTENDEES_JSON_URL.format(number=meeting.number)) + try: + response = requests.get( + settings.STATS_REGISTRATION_ATTENDEES_JSON_URL.format(number=meeting.number), + timeout=settings.DEFAULT_REQUESTS_TIMEOUT, + ) + except requests.Timeout as exc: + log(f'GET request timed out for [{settings.STATS_REGISTRATION_ATTENDEES_JSON_URL}]: {exc}') + raise RuntimeError("Timeout retrieving data from registrations API") from exc if response.status_code == 200: decoded = [] try: diff --git a/ietf/sync/iana.py b/ietf/sync/iana.py index 93f958a99..b98699156 100644 --- a/ietf/sync/iana.py +++ b/ietf/sync/iana.py @@ -19,6 +19,7 @@ from ietf.doc.mails import email_state_changed from ietf.doc.models import Document, DocEvent, State, StateDocEvent, StateType from ietf.doc.utils import add_state_change_event from ietf.person.models import Person +from ietf.utils.log import log from ietf.utils.mail import parseaddr, get_payload_text from ietf.utils.timezone import local_timezone_to_utc, email_time_to_local_timezone, utc_to_local_timezone @@ -69,8 +70,12 @@ def fetch_changes_json(url, start, end): username = "ietfsync" password = settings.IANA_SYNC_PASSWORD headers = { "Authorization": "Basic %s" % force_str(base64.encodebytes(smart_bytes("%s:%s" % (username, password)))).replace("\n", "") } - text = requests.get(url, headers=headers).text - return text + try: + response = requests.get(url, headers=headers, timeout=settings.DEFAULT_REQUESTS_TIMEOUT) + except requests.Timeout as exc: + log(f'GET request failed for [{url}]: {exc}') + raise RuntimeError(f'Timeout retrieving [{url}]') from exc + return response.text def parse_changes_json(text): response = json.loads(text) diff --git a/ietf/sync/rfceditor.py b/ietf/sync/rfceditor.py index f41554c03..c2264a0d3 100644 --- a/ietf/sync/rfceditor.py +++ b/ietf/sync/rfceditor.py @@ -558,7 +558,12 @@ def post_approved_draft(url, name): text = error = "" try: - r = requests.post(url, headers=headers, data=smart_bytes(urlencode({ 'draft': name })), timeout=20) + r = requests.post( + url, + headers=headers, + data=smart_bytes(urlencode({ 'draft': name })), + timeout=settings.DEFAULT_REQUESTS_TIMEOUT, + ) log("RFC-Editor notification result for draft '%s': %s:'%s'" % (name, r.status_code, r.text)) diff --git a/ietf/utils/hedgedoc.py b/ietf/utils/hedgedoc.py index f02d1ffd3..e6cb8101d 100644 --- a/ietf/utils/hedgedoc.py +++ b/ietf/utils/hedgedoc.py @@ -39,9 +39,13 @@ class Note: """ if self._source is None: try: - r = requests.get(urljoin(self.base_url, f'{self.id}/download'), allow_redirects=True) - except requests.RequestException: - raise ServerNoteError + r = requests.get( + urljoin(self.base_url, f'{self.id}/download'), + allow_redirects=True, + timeout=settings.DEFAULT_REQUESTS_TIMEOUT, + ) + except requests.RequestException as exc: + raise ServerNoteError from exc if r.status_code != 200: raise NoteNotFound self._source = self.preprocess_source(r.text) @@ -69,9 +73,13 @@ class Note: def _retrieve_metadata(self): if self._metadata is None: try: - r = requests.get(urljoin(self.base_url, f'{self.id}/info'), allow_redirects=True) - except requests.RequestException: - raise ServerNoteError + r = requests.get( + urljoin(self.base_url, f'{self.id}/info'), + allow_redirects=True, + timeout=settings.DEFAULT_REQUESTS_TIMEOUT, + ) + except requests.RequestException as exc: + raise ServerNoteError from exc if r.status_code != 200: raise NoteNotFound try: @@ -109,6 +117,7 @@ class NoteError(Exception): class ServerNoteError(NoteError): default_message = 'Could not reach the notes server' + class NoteNotFound(NoteError): default_message = 'Note did not exist or could not be loaded'