feat: improve / clean up logging (#7591)

* refactor: always use console log handler

* refactor: json for k8s, plain otherwise

* chore: remove syslog from wsgi.py

* chore: remove debug.log()

* chore: drop syslog from settings.py

* refactor: use log.log() in person.utils

* refactor: fetch_meeting_attendance->log.log()

* chore: gunicorn logs as JSON (wip)

* feat: better json log formatting

* refactor: improve log config

* feat: gunicorn access log fields

* fix: remove type hints

The gunicorn logger plays tricks with the
LogRecord args parameter to let it have string
keys instead of being a simple tuple.
The mypy tests rightly flag this. Rather
than fighting the typing, just remove the
hints and leave a comment warning not to
use the gunicorn-specific formatter with
other loggers.
This commit is contained in:
Jennifer Richards 2024-06-26 16:53:05 -03:00 committed by GitHub
parent 704f9967fd
commit 6a96a69234
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 95 additions and 52 deletions

View file

@ -3,15 +3,7 @@ import os
import sys
import time as timeutils
import inspect
from typing import Callable
try:
import syslog
logger = syslog.syslog # type: Callable
except ImportError: # import syslog will fail on Windows boxes
import logging
logging.basicConfig(filename='tracker.log',level=logging.INFO)
logger = logging.info
try:
from pprint import pformat
@ -155,13 +147,6 @@ def showpos(name):
indent = ' ' * (_report_indent[0])
sys.stderr.write("%s%s:%s: %s: '%s'\n" % (indent, fn, line, name, value))
def log(name):
if debug:
frame = inspect.stack()[1][0]
value = eval(name, frame.f_globals, frame.f_locals)
indent = ' ' * (_report_indent[0])
logger("%s%s: %s" % (indent, name, value))
def pprint(name):
if debug:
frame = inspect.stack()[1][0]

View file

@ -18,6 +18,7 @@ COPY . .
COPY ./dev/build/start.sh ./start.sh
COPY ./dev/build/datatracker-start.sh ./datatracker-start.sh
COPY ./dev/build/celery-start.sh ./celery-start.sh
COPY ./dev/build/gunicorn.conf.py ./gunicorn.conf.py
RUN pip3 --disable-pip-version-check --no-cache-dir install -r requirements.txt && \
echo '# empty' > ietf/settings_local.py && \

View file

@ -0,0 +1,49 @@
# Copyright The IETF Trust 2024, All Rights Reserved
# Log as JSON on stdout (to distinguish from Django's logs on stderr)
#
# This is applied as an update to gunicorn's glogging.CONFIG_DEFAULTS.
logconfig_dict = {
"version": 1,
"disable_existing_loggers": False,
"root": {"level": "INFO", "handlers": ["console"]},
"loggers": {
"gunicorn.error": {
"level": "INFO",
"handlers": ["console"],
"propagate": False,
"qualname": "gunicorn.error"
},
"gunicorn.access": {
"level": "INFO",
"handlers": ["access_console"],
"propagate": False,
"qualname": "gunicorn.access"
}
},
"handlers": {
"console": {
"class": "logging.StreamHandler",
"formatter": "json",
"stream": "ext://sys.stdout"
},
"access_console": {
"class": "logging.StreamHandler",
"formatter": "access_json",
"stream": "ext://sys.stdout"
},
},
"formatters": {
"json": {
"class": "ietf.utils.jsonlogger.DatatrackerJsonFormatter",
"style": "{",
"format": "{asctime}{levelname}{message}{name}{process}",
},
"access_json": {
"class": "ietf.utils.jsonlogger.GunicornRequestJsonFormatter",
"style": "{",
"format": "{asctime}{levelname}{message}{name}{process}",
}
}
}

View file

@ -3,10 +3,8 @@
import datetime
import os
import pprint
import sys
import syslog
from django.contrib import admin
from django.core.cache import cache
@ -17,14 +15,14 @@ from django.http import Http404
import debug # pyflakes:ignore
from ietf.person.models import Person, Alias, Email
from ietf.utils import log
from ietf.utils.mail import send_mail
def merge_persons(request, source, target, file=sys.stdout, verbose=False):
changes = []
# write log
syslog.openlog(str(os.path.basename(__file__)), syslog.LOG_PID, syslog.LOG_USER)
syslog.syslog("Merging person records {} => {}".format(source.pk,target.pk))
log.log(f"Merging person records {source.pk} => {target.pk}")
# handle primary emails
for email in get_extra_primary(source,target):
@ -118,7 +116,7 @@ def handle_users(source,target,check_only=False):
if source.user and target.user:
message = "DATATRACKER LOGIN ACTION: retaining login: {}, removing login: {}".format(target.user,source.user)
if not check_only:
syslog.syslog('merge-person-records: deactivating user {}'.format(source.user.username))
log.log(f"merge-person-records: deactivating user {source.user.username}")
user = source.user
source.user = None
source.save()

View file

@ -26,11 +26,6 @@ warnings.filterwarnings("ignore", message="The logout\\(\\) view is superseded b
warnings.filterwarnings("ignore", message="Report.file_reporters will no longer be available in Coverage.py 4.2", module="coverage.report")
warnings.filterwarnings("ignore", message="Using or importing the ABCs from 'collections' instead of from 'collections.abc' is deprecated", module="bleach")
warnings.filterwarnings("ignore", message="HTTPResponse.getheader\\(\\) is deprecated", module='selenium.webdriver')
try:
import syslog
syslog.openlog(str("datatracker"), syslog.LOG_PID, syslog.LOG_USER)
except ImportError:
pass
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
sys.path.append(os.path.abspath(BASE_DIR + "/.."))
@ -240,11 +235,11 @@ LOGGING = {
#
'loggers': {
'django': {
'handlers': ['debug_console', 'mail_admins'],
'handlers': ['console', 'mail_admins'],
'level': 'INFO',
},
'django.request': {
'handlers': ['debug_console'],
'handlers': ['console'],
'level': 'ERROR',
},
'django.server': {
@ -252,19 +247,19 @@ LOGGING = {
'level': 'INFO',
},
'django.security': {
'handlers': ['debug_console', ],
'handlers': ['console', ],
'level': 'INFO',
},
'oidc_provider': {
'handlers': ['debug_console', ],
'handlers': ['console', ],
'level': 'DEBUG',
},
'datatracker': {
'handlers': ['debug_console'],
'handlers': ['console'],
'level': 'INFO',
},
'celery': {
'handlers': ['debug_console'],
'handlers': ['console'],
'level': 'INFO',
},
},
@ -275,7 +270,7 @@ LOGGING = {
'console': {
'level': 'DEBUG',
'class': 'logging.StreamHandler',
'formatter': 'json',
'formatter': 'plain',
},
'debug_console': {
# Active only when DEBUG=True
@ -331,7 +326,9 @@ LOGGING = {
'format': '{levelname}: {name}:{lineno}: {message}',
},
'json' : {
'()': 'pythonjsonlogger.jsonlogger.JsonFormatter'
"class": "ietf.utils.jsonlogger.DatatrackerJsonFormatter",
"style": "{",
"format": "{asctime}{levelname}{message}{name}{pathname}{lineno}{funcName}{process}",
}
},
}

View file

@ -1,8 +1,6 @@
# Copyright The IETF Trust 2017-2019, All Rights Reserved
# Copyright 2016 IETF Trust
import syslog
from django.core.management.base import BaseCommand, CommandError
from django.utils import timezone
@ -10,10 +8,8 @@ import debug # pyflakes:ignore
from ietf.meeting.models import Meeting
from ietf.stats.utils import fetch_attendance_from_meetings
from ietf.utils import log
logtag = __name__.split('.')[-1]
logname = "user.log"
syslog.openlog(str(logtag), syslog.LOG_PID, syslog.LOG_USER)
class Command(BaseCommand):
help = "Fetch meeting attendee figures from ietf.org/registration/attendees."
@ -43,4 +39,4 @@ class Command(BaseCommand):
if self.stdout.isatty():
self.stdout.write(msg+'\n') # make debugging a bit easier
else:
syslog.syslog(msg)
log.log(msg)

26
ietf/utils/jsonlogger.py Normal file
View file

@ -0,0 +1,26 @@
# Copyright The IETF Trust 2024, All Rights Reserved
from pythonjsonlogger import jsonlogger
import time
class DatatrackerJsonFormatter(jsonlogger.JsonFormatter):
converter = time.gmtime # use UTC
default_msec_format = "%s.%03d" # '.' instead of ','
class GunicornRequestJsonFormatter(DatatrackerJsonFormatter):
"""Only works with Gunicorn's logging"""
def add_fields(self, log_record, record, message_dict):
super().add_fields(log_record, record, message_dict)
log_record.setdefault("method", record.args["m"])
log_record.setdefault("proto", record.args["H"])
log_record.setdefault("remote_ip", record.args["h"])
path = record.args["U"] # URL path
if record.args["q"]: # URL query string
path = "?".join([path, record.args["q"]])
log_record.setdefault("path", path)
log_record.setdefault("status", record.args["s"])
log_record.setdefault("referer", record.args["f"])
log_record.setdefault("user_agent", record.args["a"])
log_record.setdefault("len_bytes", record.args["B"])
log_record.setdefault("duration_ms", record.args["M"])

View file

@ -1,23 +1,17 @@
# Copyright The IETF Trust 2013-2021, All Rights Reserved
# Copyright The IETF Trust 2013-2024, All Rights Reserved
# -*- coding: utf-8 -*-
import os
import sys
import syslog
path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
syslog.openlog(str("datatracker"), syslog.LOG_PID, syslog.LOG_USER)
if not path in sys.path:
sys.path.insert(0, path)
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "ietf.settings")
syslog.syslog("Starting datatracker wsgi instance")
from django.core.wsgi import get_wsgi_application
application = get_wsgi_application()
application = get_wsgi_application()

View file

@ -261,8 +261,5 @@ _csrf_trusted_origins_str = os.environ.get("DATATRACKER_CSRF_TRUSTED_ORIGINS")
if _csrf_trusted_origins_str is not None:
CSRF_TRUSTED_ORIGINS = _multiline_to_list(_csrf_trusted_origins_str)
# Send logs to console instead of debug_console when running in kubernetes
LOGGING["loggers"]["django"]["handlers"] = ["console", "mail_admins"]
LOGGING["loggers"]["django.security"]["handlers"] = ["console"]
LOGGING["loggers"]["datatracker"]["handlers"] = ["console"]
LOGGING["loggers"]["celery"]["handlers"] = ["console"]
# Console logs as JSON instead of plain when running in k8s
LOGGING["handlers"]["console"]["formatter"] = "json"