feat: improve / clean up logging (#7591)

* refactor: always use console log handler

* refactor: json for k8s, plain otherwise

* chore: remove syslog from wsgi.py

* chore: remove debug.log()

* chore: drop syslog from settings.py

* refactor: use log.log() in person.utils

* refactor: fetch_meeting_attendance->log.log()

* chore: gunicorn logs as JSON (wip)

* feat: better json log formatting

* refactor: improve log config

* feat: gunicorn access log fields

* fix: remove type hints

The gunicorn logger plays tricks with the
LogRecord args parameter to let it have string
keys instead of being a simple tuple.
The mypy tests rightly flag this. Rather
than fighting the typing, just remove the
hints and leave a comment warning not to
use the gunicorn-specific formatter with
other loggers.
This commit is contained in:
Jennifer Richards 2024-06-26 16:53:05 -03:00 committed by GitHub
parent 704f9967fd
commit 6a96a69234
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 95 additions and 52 deletions

View file

@ -3,15 +3,7 @@ import os
import sys import sys
import time as timeutils import time as timeutils
import inspect import inspect
from typing import Callable
try:
import syslog
logger = syslog.syslog # type: Callable
except ImportError: # import syslog will fail on Windows boxes
import logging
logging.basicConfig(filename='tracker.log',level=logging.INFO)
logger = logging.info
try: try:
from pprint import pformat from pprint import pformat
@ -155,13 +147,6 @@ def showpos(name):
indent = ' ' * (_report_indent[0]) indent = ' ' * (_report_indent[0])
sys.stderr.write("%s%s:%s: %s: '%s'\n" % (indent, fn, line, name, value)) sys.stderr.write("%s%s:%s: %s: '%s'\n" % (indent, fn, line, name, value))
def log(name):
if debug:
frame = inspect.stack()[1][0]
value = eval(name, frame.f_globals, frame.f_locals)
indent = ' ' * (_report_indent[0])
logger("%s%s: %s" % (indent, name, value))
def pprint(name): def pprint(name):
if debug: if debug:
frame = inspect.stack()[1][0] frame = inspect.stack()[1][0]

View file

@ -18,6 +18,7 @@ COPY . .
COPY ./dev/build/start.sh ./start.sh COPY ./dev/build/start.sh ./start.sh
COPY ./dev/build/datatracker-start.sh ./datatracker-start.sh COPY ./dev/build/datatracker-start.sh ./datatracker-start.sh
COPY ./dev/build/celery-start.sh ./celery-start.sh COPY ./dev/build/celery-start.sh ./celery-start.sh
COPY ./dev/build/gunicorn.conf.py ./gunicorn.conf.py
RUN pip3 --disable-pip-version-check --no-cache-dir install -r requirements.txt && \ RUN pip3 --disable-pip-version-check --no-cache-dir install -r requirements.txt && \
echo '# empty' > ietf/settings_local.py && \ echo '# empty' > ietf/settings_local.py && \

View file

@ -0,0 +1,49 @@
# Copyright The IETF Trust 2024, All Rights Reserved
# Log as JSON on stdout (to distinguish from Django's logs on stderr)
#
# This is applied as an update to gunicorn's glogging.CONFIG_DEFAULTS.
logconfig_dict = {
"version": 1,
"disable_existing_loggers": False,
"root": {"level": "INFO", "handlers": ["console"]},
"loggers": {
"gunicorn.error": {
"level": "INFO",
"handlers": ["console"],
"propagate": False,
"qualname": "gunicorn.error"
},
"gunicorn.access": {
"level": "INFO",
"handlers": ["access_console"],
"propagate": False,
"qualname": "gunicorn.access"
}
},
"handlers": {
"console": {
"class": "logging.StreamHandler",
"formatter": "json",
"stream": "ext://sys.stdout"
},
"access_console": {
"class": "logging.StreamHandler",
"formatter": "access_json",
"stream": "ext://sys.stdout"
},
},
"formatters": {
"json": {
"class": "ietf.utils.jsonlogger.DatatrackerJsonFormatter",
"style": "{",
"format": "{asctime}{levelname}{message}{name}{process}",
},
"access_json": {
"class": "ietf.utils.jsonlogger.GunicornRequestJsonFormatter",
"style": "{",
"format": "{asctime}{levelname}{message}{name}{process}",
}
}
}

View file

@ -3,10 +3,8 @@
import datetime import datetime
import os
import pprint import pprint
import sys import sys
import syslog
from django.contrib import admin from django.contrib import admin
from django.core.cache import cache from django.core.cache import cache
@ -17,14 +15,14 @@ from django.http import Http404
import debug # pyflakes:ignore import debug # pyflakes:ignore
from ietf.person.models import Person, Alias, Email from ietf.person.models import Person, Alias, Email
from ietf.utils import log
from ietf.utils.mail import send_mail from ietf.utils.mail import send_mail
def merge_persons(request, source, target, file=sys.stdout, verbose=False): def merge_persons(request, source, target, file=sys.stdout, verbose=False):
changes = [] changes = []
# write log # write log
syslog.openlog(str(os.path.basename(__file__)), syslog.LOG_PID, syslog.LOG_USER) log.log(f"Merging person records {source.pk} => {target.pk}")
syslog.syslog("Merging person records {} => {}".format(source.pk,target.pk))
# handle primary emails # handle primary emails
for email in get_extra_primary(source,target): for email in get_extra_primary(source,target):
@ -118,7 +116,7 @@ def handle_users(source,target,check_only=False):
if source.user and target.user: if source.user and target.user:
message = "DATATRACKER LOGIN ACTION: retaining login: {}, removing login: {}".format(target.user,source.user) message = "DATATRACKER LOGIN ACTION: retaining login: {}, removing login: {}".format(target.user,source.user)
if not check_only: if not check_only:
syslog.syslog('merge-person-records: deactivating user {}'.format(source.user.username)) log.log(f"merge-person-records: deactivating user {source.user.username}")
user = source.user user = source.user
source.user = None source.user = None
source.save() source.save()

View file

@ -26,11 +26,6 @@ warnings.filterwarnings("ignore", message="The logout\\(\\) view is superseded b
warnings.filterwarnings("ignore", message="Report.file_reporters will no longer be available in Coverage.py 4.2", module="coverage.report") warnings.filterwarnings("ignore", message="Report.file_reporters will no longer be available in Coverage.py 4.2", module="coverage.report")
warnings.filterwarnings("ignore", message="Using or importing the ABCs from 'collections' instead of from 'collections.abc' is deprecated", module="bleach") warnings.filterwarnings("ignore", message="Using or importing the ABCs from 'collections' instead of from 'collections.abc' is deprecated", module="bleach")
warnings.filterwarnings("ignore", message="HTTPResponse.getheader\\(\\) is deprecated", module='selenium.webdriver') warnings.filterwarnings("ignore", message="HTTPResponse.getheader\\(\\) is deprecated", module='selenium.webdriver')
try:
import syslog
syslog.openlog(str("datatracker"), syslog.LOG_PID, syslog.LOG_USER)
except ImportError:
pass
BASE_DIR = os.path.dirname(os.path.abspath(__file__)) BASE_DIR = os.path.dirname(os.path.abspath(__file__))
sys.path.append(os.path.abspath(BASE_DIR + "/..")) sys.path.append(os.path.abspath(BASE_DIR + "/.."))
@ -240,11 +235,11 @@ LOGGING = {
# #
'loggers': { 'loggers': {
'django': { 'django': {
'handlers': ['debug_console', 'mail_admins'], 'handlers': ['console', 'mail_admins'],
'level': 'INFO', 'level': 'INFO',
}, },
'django.request': { 'django.request': {
'handlers': ['debug_console'], 'handlers': ['console'],
'level': 'ERROR', 'level': 'ERROR',
}, },
'django.server': { 'django.server': {
@ -252,19 +247,19 @@ LOGGING = {
'level': 'INFO', 'level': 'INFO',
}, },
'django.security': { 'django.security': {
'handlers': ['debug_console', ], 'handlers': ['console', ],
'level': 'INFO', 'level': 'INFO',
}, },
'oidc_provider': { 'oidc_provider': {
'handlers': ['debug_console', ], 'handlers': ['console', ],
'level': 'DEBUG', 'level': 'DEBUG',
}, },
'datatracker': { 'datatracker': {
'handlers': ['debug_console'], 'handlers': ['console'],
'level': 'INFO', 'level': 'INFO',
}, },
'celery': { 'celery': {
'handlers': ['debug_console'], 'handlers': ['console'],
'level': 'INFO', 'level': 'INFO',
}, },
}, },
@ -275,7 +270,7 @@ LOGGING = {
'console': { 'console': {
'level': 'DEBUG', 'level': 'DEBUG',
'class': 'logging.StreamHandler', 'class': 'logging.StreamHandler',
'formatter': 'json', 'formatter': 'plain',
}, },
'debug_console': { 'debug_console': {
# Active only when DEBUG=True # Active only when DEBUG=True
@ -331,7 +326,9 @@ LOGGING = {
'format': '{levelname}: {name}:{lineno}: {message}', 'format': '{levelname}: {name}:{lineno}: {message}',
}, },
'json' : { 'json' : {
'()': 'pythonjsonlogger.jsonlogger.JsonFormatter' "class": "ietf.utils.jsonlogger.DatatrackerJsonFormatter",
"style": "{",
"format": "{asctime}{levelname}{message}{name}{pathname}{lineno}{funcName}{process}",
} }
}, },
} }

View file

@ -1,8 +1,6 @@
# Copyright The IETF Trust 2017-2019, All Rights Reserved # Copyright The IETF Trust 2017-2019, All Rights Reserved
# Copyright 2016 IETF Trust # Copyright 2016 IETF Trust
import syslog
from django.core.management.base import BaseCommand, CommandError from django.core.management.base import BaseCommand, CommandError
from django.utils import timezone from django.utils import timezone
@ -10,10 +8,8 @@ import debug # pyflakes:ignore
from ietf.meeting.models import Meeting from ietf.meeting.models import Meeting
from ietf.stats.utils import fetch_attendance_from_meetings from ietf.stats.utils import fetch_attendance_from_meetings
from ietf.utils import log
logtag = __name__.split('.')[-1]
logname = "user.log"
syslog.openlog(str(logtag), syslog.LOG_PID, syslog.LOG_USER)
class Command(BaseCommand): class Command(BaseCommand):
help = "Fetch meeting attendee figures from ietf.org/registration/attendees." help = "Fetch meeting attendee figures from ietf.org/registration/attendees."
@ -43,4 +39,4 @@ class Command(BaseCommand):
if self.stdout.isatty(): if self.stdout.isatty():
self.stdout.write(msg+'\n') # make debugging a bit easier self.stdout.write(msg+'\n') # make debugging a bit easier
else: else:
syslog.syslog(msg) log.log(msg)

26
ietf/utils/jsonlogger.py Normal file
View file

@ -0,0 +1,26 @@
# Copyright The IETF Trust 2024, All Rights Reserved
from pythonjsonlogger import jsonlogger
import time
class DatatrackerJsonFormatter(jsonlogger.JsonFormatter):
converter = time.gmtime # use UTC
default_msec_format = "%s.%03d" # '.' instead of ','
class GunicornRequestJsonFormatter(DatatrackerJsonFormatter):
"""Only works with Gunicorn's logging"""
def add_fields(self, log_record, record, message_dict):
super().add_fields(log_record, record, message_dict)
log_record.setdefault("method", record.args["m"])
log_record.setdefault("proto", record.args["H"])
log_record.setdefault("remote_ip", record.args["h"])
path = record.args["U"] # URL path
if record.args["q"]: # URL query string
path = "?".join([path, record.args["q"]])
log_record.setdefault("path", path)
log_record.setdefault("status", record.args["s"])
log_record.setdefault("referer", record.args["f"])
log_record.setdefault("user_agent", record.args["a"])
log_record.setdefault("len_bytes", record.args["B"])
log_record.setdefault("duration_ms", record.args["M"])

View file

@ -1,23 +1,17 @@
# Copyright The IETF Trust 2013-2021, All Rights Reserved # Copyright The IETF Trust 2013-2024, All Rights Reserved
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import os import os
import sys import sys
import syslog
path = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
syslog.openlog(str("datatracker"), syslog.LOG_PID, syslog.LOG_USER)
if not path in sys.path: if not path in sys.path:
sys.path.insert(0, path) sys.path.insert(0, path)
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "ietf.settings") os.environ.setdefault("DJANGO_SETTINGS_MODULE", "ietf.settings")
syslog.syslog("Starting datatracker wsgi instance")
from django.core.wsgi import get_wsgi_application from django.core.wsgi import get_wsgi_application
application = get_wsgi_application()
application = get_wsgi_application()

View file

@ -261,8 +261,5 @@ _csrf_trusted_origins_str = os.environ.get("DATATRACKER_CSRF_TRUSTED_ORIGINS")
if _csrf_trusted_origins_str is not None: if _csrf_trusted_origins_str is not None:
CSRF_TRUSTED_ORIGINS = _multiline_to_list(_csrf_trusted_origins_str) CSRF_TRUSTED_ORIGINS = _multiline_to_list(_csrf_trusted_origins_str)
# Send logs to console instead of debug_console when running in kubernetes # Console logs as JSON instead of plain when running in k8s
LOGGING["loggers"]["django"]["handlers"] = ["console", "mail_admins"] LOGGING["handlers"]["console"]["formatter"] = "json"
LOGGING["loggers"]["django.security"]["handlers"] = ["console"]
LOGGING["loggers"]["datatracker"]["handlers"] = ["console"]
LOGGING["loggers"]["celery"]["handlers"] = ["console"]