Inbound email: download from S3 + convert HTML to plaintext (#5348)

# What this PR does

* Make `AmazonSESValidatedInboundWebhookView` able to download emails
from S3 by providing AWS credentials via env variables
* Convert HTML to plaintext when there's only `text/html` available

## Which issue(s) this PR closes

Related to https://github.com/grafana/oncall-private/issues/2905

## Checklist

- [x] Unit, integration, and e2e (if applicable) tests updated
- [x] Documentation added (or `pr:no public docs` PR label added if not
required)
- [x] Added the relevant release notes label (see labels prefixed w/
`release:`). These labels dictate how your PR will
    show up in the autogenerated release notes.
This commit is contained in:
Vadim Stepanov 2024-12-18 16:35:44 +00:00 committed by GitHub
parent 0694fe5572
commit c36761e345
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 245 additions and 31 deletions

View file

@ -7,6 +7,8 @@ from anymail.exceptions import AnymailAPIError, AnymailInvalidAddress, AnymailWe
from anymail.inbound import AnymailInboundMessage
from anymail.signals import AnymailInboundEvent
from anymail.webhooks import amazon_ses, mailgun, mailjet, mandrill, postal, postmark, sendgrid, sparkpost
from bs4 import BeautifulSoup
from django.conf import settings
from django.http import HttpResponse, HttpResponseNotAllowed
from django.utils import timezone
from rest_framework import status
@ -25,6 +27,15 @@ class AmazonSESValidatedInboundWebhookView(amazon_ses.AmazonSESInboundWebhookVie
# disable "Your Anymail webhooks are insecure and open to anyone on the web." warning
warn_if_no_basic_auth = False
def __init__(self):
super().__init__(
session_params={
"aws_access_key_id": settings.INBOUND_EMAIL_AWS_ACCESS_KEY_ID,
"aws_secret_access_key": settings.INBOUND_EMAIL_AWS_SECRET_ACCESS_KEY,
"region_name": settings.INBOUND_EMAIL_AWS_REGION,
},
)
def validate_request(self, request):
"""Add SNS message validation to Amazon SES inbound webhook view, which is not implemented in Anymail."""
if not validate_amazon_sns_message(self._parse_sns_message(request)):
@ -74,11 +85,10 @@ class InboundEmailWebhookView(AlertChannelDefiningMixin, APIView):
if request.method.lower() == "head":
return HttpResponse(status=status.HTTP_200_OK)
integration_token = self.get_integration_token_from_request(request)
if integration_token is None:
if self.integration_token is None:
return HttpResponse(status=status.HTTP_400_BAD_REQUEST)
request.inbound_email_integration_token = integration_token # used in RequestTimeLoggingMiddleware
return super().dispatch(request, alert_channel_key=integration_token)
request.inbound_email_integration_token = self.integration_token # used in RequestTimeLoggingMiddleware
return super().dispatch(request, alert_channel_key=self.integration_token)
def post(self, request):
payload = self.get_alert_payload_from_email_message(self.message)
@ -94,7 +104,8 @@ class InboundEmailWebhookView(AlertChannelDefiningMixin, APIView):
)
return Response("OK", status=status.HTTP_200_OK)
def get_integration_token_from_request(self, request) -> Optional[str]:
@cached_property
def integration_token(self) -> Optional[str]:
if not self.message:
return None
# First try envelope_recipient field.
@ -151,7 +162,8 @@ class InboundEmailWebhookView(AlertChannelDefiningMixin, APIView):
logger.error("Failed to parse inbound email message")
return None
def check_inbound_email_settings_set(self):
@staticmethod
def check_inbound_email_settings_set():
"""
Guard method to checks if INBOUND_EMAIL settings present.
Returns InternalServerError if not.
@ -167,16 +179,105 @@ class InboundEmailWebhookView(AlertChannelDefiningMixin, APIView):
logger.error("InboundEmailWebhookView: INBOUND_EMAIL_DOMAIN env variable must be set.")
return HttpResponse(status=status.HTTP_500_INTERNAL_SERVER_ERROR)
def get_alert_payload_from_email_message(self, email: AnymailInboundMessage) -> EmailAlertPayload:
subject = email.subject or ""
subject = subject.strip()
message = email.text or ""
message = message.strip()
sender = self.get_sender_from_email_message(email)
@classmethod
def get_alert_payload_from_email_message(cls, email: AnymailInboundMessage) -> EmailAlertPayload:
if email.text:
message = email.text.strip()
elif email.html:
message = cls.html_to_plaintext(email.html)
else:
message = ""
return {"subject": subject, "message": message, "sender": sender}
return {
"subject": email.subject.strip() if email.subject else "",
"message": message,
"sender": cls.get_sender_from_email_message(email),
}
def get_sender_from_email_message(self, email: AnymailInboundMessage) -> str:
@staticmethod
def html_to_plaintext(html: str) -> str:
"""
Converts HTML to plain text. Renders links as "text (href)" and removes any empty lines.
Converting HTML to plaintext is a non-trivial task, so this method may not work perfectly for all cases.
"""
soup = BeautifulSoup(html, "html.parser")
# Browsers typically render these elements on their own line.
# There is no single official HTML5 list for this, so we go with HTML tags that render as
# display: block, display: list-item, display: table, display: table-row by default according to the HTML standard:
# https://html.spec.whatwg.org/multipage/rendering.html
newline_tags = [
"address",
"article",
"aside",
"blockquote",
"body",
"center",
"dd",
"details",
"dialog",
"dir",
"div",
"dl",
"dt",
"fieldset",
"figcaption",
"figure",
"footer",
"form",
"h1",
"h2",
"h3",
"h4",
"h5",
"h6",
"header",
"hgroup",
"hr",
"html",
"legend",
"li",
"listing",
"main",
"menu",
"nav",
"ol",
"p",
"plaintext",
"pre",
"search",
"section",
"summary",
"table",
"tr",
"ul",
"xmp",
]
# Insert a newline after each block-level element
for tag in soup.find_all(newline_tags):
tag.insert_before("\n")
tag.insert_after("\n")
# <br> tags are also typically rendered as newlines
for br in soup.find_all("br"):
br.replace_with("\n")
# example: "<a href="https://example.com">example</a>" -> "example (https://example.com)"
for a in soup.find_all("a"):
if href := a.get("href"):
a.append(f" ({href})")
for li in soup.find_all("li"):
li.insert_before("* ")
for hr in soup.find_all("hr"):
hr.replace_with("-" * 32)
# remove empty lines
return "\n".join(line.strip() for line in soup.get_text().splitlines() if line.strip())
@staticmethod
def get_sender_from_email_message(email: AnymailInboundMessage) -> str:
try:
if isinstance(email.from_email, list):
sender = email.from_email[0].addr_spec

View file

@ -6,6 +6,7 @@ from base64 import b64encode
from textwrap import dedent
from unittest.mock import ANY, Mock, patch
import httpretty
import pytest
from anymail.inbound import AnymailInboundMessage
from cryptography import x509
@ -54,13 +55,14 @@ SUBJECT = "Test email"
MESSAGE = "This is a test email message body."
def _sns_inbound_email_payload_and_headers(sender_email, to_email, subject, message):
def _sns_inbound_email_setup(sender_email, to_email, subject, message, content_type="text/plain", s3=False):
content = (
f"From: Sender Name <{sender_email}>\n"
f"To: {to_email}\n"
f"Subject: {subject}\n"
"Date: Tue, 5 Nov 2024 16:05:39 +0000\n"
"Message-ID: <example-message-id@mail.example.com>\n\n"
"Message-ID: <example-message-id@mail.example.com>\n"
f"Content-Type: {content_type}\n\n"
f"{message}\r\n"
)
@ -130,7 +132,7 @@ def _sns_inbound_email_payload_and_headers(sender_email, to_email, subject, mess
{"name": "To", "value": to_email},
{
"name": "Content-Type",
"value": 'multipart/alternative; boundary="00000000000036b9f706262c9312"',
"value": f"{content_type}",
},
],
"commonHeaders": {
@ -152,12 +154,12 @@ def _sns_inbound_email_payload_and_headers(sender_email, to_email, subject, mess
"dkimVerdict": {"status": "PASS"},
"dmarcVerdict": {"status": "PASS"},
"action": {
"type": "SNS",
"type": "S3" if s3 else "SNS",
"topicArn": "arn:aws:sns:us-east-2:123456789012:test",
"encoding": "BASE64",
**({"bucketName": "test-s3-bucket", "objectKey": "test-object-key"} if s3 else {"encoding": "BASE64"}),
},
},
"content": b64encode(content.encode()).decode(),
**({} if s3 else {"content": b64encode(content.encode()).decode()}),
}
payload = {
@ -189,7 +191,7 @@ def _sns_inbound_email_payload_and_headers(sender_email, to_email, subject, mess
"X-Amz-Sns-Message-Type": "Notification",
"X-Amz-Sns-Message-Id": "example-message-id-1234",
}
return payload, headers
return payload, headers, content
def _mailgun_inbound_email_payload(sender_email, to_email, subject, message):
@ -444,7 +446,7 @@ def test_amazon_ses_pass(create_alert_mock, settings, make_organization, make_al
token="test-token",
)
sns_payload, sns_headers = _sns_inbound_email_payload_and_headers(
sns_payload, sns_headers, _ = _sns_inbound_email_setup(
sender_email=SENDER_EMAIL,
to_email=TO_EMAIL,
subject=SUBJECT,
@ -476,16 +478,17 @@ def test_amazon_ses_pass(create_alert_mock, settings, make_organization, make_al
)
@patch("requests.get", return_value=Mock(content=CERTIFICATE))
@patch.object(create_alert, "delay")
@httpretty.activate(verbose=True, allow_net_connect=True)
@pytest.mark.django_db
def test_amazon_ses_validated_pass(
mock_create_alert, mock_requests_get, settings, make_organization, make_alert_receive_channel
):
def test_amazon_ses_validated_s3_pass(mock_create_alert, settings, make_organization, make_alert_receive_channel):
settings.INBOUND_EMAIL_ESP = "amazon_ses_validated,mailgun"
settings.INBOUND_EMAIL_DOMAIN = "inbound.example.com"
settings.INBOUND_EMAIL_WEBHOOK_SECRET = "secret"
settings.INBOUND_EMAIL_AMAZON_SNS_TOPIC_ARN = AMAZON_SNS_TOPIC_ARN
settings.INBOUND_EMAIL_AWS_ACCESS_KEY_ID = "test-access-key-id"
settings.INBOUND_EMAIL_AWS_SECRET_ACCESS_KEY = "test-secret-access-key"
settings.INBOUND_EMAIL_AWS_REGION = "us-east-2"
organization = make_organization()
alert_receive_channel = make_alert_receive_channel(
@ -494,11 +497,24 @@ def test_amazon_ses_validated_pass(
token="test-token",
)
sns_payload, sns_headers = _sns_inbound_email_payload_and_headers(
sns_payload, sns_headers, content = _sns_inbound_email_setup(
sender_email=SENDER_EMAIL,
to_email=TO_EMAIL,
subject=SUBJECT,
message=MESSAGE,
s3=True,
)
httpretty.register_uri(httpretty.GET, SIGNING_CERT_URL, body=CERTIFICATE)
httpretty.register_uri(
httpretty.HEAD,
"https://test-s3-bucket.s3.us-east-2.amazonaws.com/test-object-key",
responses=[httpretty.Response(body="")],
)
httpretty.register_uri(
httpretty.GET,
"https://test-s3-bucket.s3.us-east-2.amazonaws.com/test-object-key",
responses=[httpretty.Response(body=content)],
)
client = APIClient()
@ -525,6 +541,100 @@ def test_amazon_ses_validated_pass(
received_at=ANY,
)
assert len(httpretty.latest_requests()) == 3
assert (httpretty.latest_requests()[0].method, httpretty.latest_requests()[0].path) == (
"GET",
"/SimpleNotificationService-example.pem",
)
assert (httpretty.latest_requests()[1].method, httpretty.latest_requests()[1].path) == ("HEAD", "/test-object-key")
assert (httpretty.latest_requests()[2].method, httpretty.latest_requests()[2].path) == ("GET", "/test-object-key")
@patch("requests.get", return_value=Mock(content=CERTIFICATE))
@patch.object(create_alert, "delay")
@pytest.mark.django_db
def test_amazon_ses_validated_pass_html(
mock_create_alert, mock_requests_get, settings, make_organization, make_alert_receive_channel
):
settings.INBOUND_EMAIL_ESP = "amazon_ses_validated,mailgun"
settings.INBOUND_EMAIL_DOMAIN = "inbound.example.com"
settings.INBOUND_EMAIL_WEBHOOK_SECRET = "secret"
settings.INBOUND_EMAIL_AMAZON_SNS_TOPIC_ARN = AMAZON_SNS_TOPIC_ARN
organization = make_organization()
alert_receive_channel = make_alert_receive_channel(
organization,
integration=AlertReceiveChannel.INTEGRATION_INBOUND_EMAIL,
token="test-token",
)
html_message = """\
<html>
<title>title</title>
<body>
<div>
<h1>h1</h1>
<br><br><br>
<p>p<b>b</b><i>i</i> <span>span</span></p> <p>new line</p> <hr>
<a href="https://example.com">link</a>
<ul>
<li>li1</li>
<li>li2</li>
</ul>
<table>
<tr>
<td>td1</td>
<td>td2</td>
</tr>
</table>
</div>
</body>
</html>
"""
plaintext_message = (
"title\n"
"h1\n"
"pbi span\n"
"new line\n"
"--------------------------------\n"
"link (https://example.com)\n"
"* li1\n"
"* li2\n"
"td1\n"
"td2"
)
sns_payload, sns_headers, _ = _sns_inbound_email_setup(
sender_email=SENDER_EMAIL,
to_email=TO_EMAIL,
subject=SUBJECT,
message=html_message,
content_type="text/html",
)
client = APIClient()
response = client.post(
reverse("integrations:inbound_email_webhook"),
data=sns_payload,
headers=sns_headers,
format="json",
)
assert response.status_code == status.HTTP_200_OK
mock_create_alert.assert_called_once_with(
title=SUBJECT,
message=plaintext_message,
alert_receive_channel_pk=alert_receive_channel.pk,
image_url=None,
link_to_upstream_details=None,
integration_unique_data=None,
raw_request_data={
"subject": SUBJECT,
"message": plaintext_message,
"sender": SENDER_EMAIL,
},
received_at=ANY,
)
mock_requests_get.assert_called_once_with(SIGNING_CERT_URL, timeout=5)
@ -546,7 +656,7 @@ def test_amazon_ses_validated_fail_wrong_sns_topic_arn(
token="test-token",
)
sns_payload, sns_headers = _sns_inbound_email_payload_and_headers(
sns_payload, sns_headers, _ = _sns_inbound_email_setup(
sender_email=SENDER_EMAIL,
to_email=TO_EMAIL,
subject=SUBJECT,
@ -584,7 +694,7 @@ def test_amazon_ses_validated_fail_wrong_signature(
token="test-token",
)
sns_payload, sns_headers = _sns_inbound_email_payload_and_headers(
sns_payload, sns_headers, _ = _sns_inbound_email_setup(
sender_email=SENDER_EMAIL,
to_email=TO_EMAIL,
subject=SUBJECT,
@ -622,7 +732,7 @@ def test_amazon_ses_validated_fail_cant_download_certificate(
token="test-token",
)
sns_payload, sns_headers = _sns_inbound_email_payload_and_headers(
sns_payload, sns_headers, _ = _sns_inbound_email_setup(
sender_email=SENDER_EMAIL,
to_email=TO_EMAIL,
subject=SUBJECT,
@ -656,7 +766,7 @@ def test_amazon_ses_validated_caches_certificate(
token="test-token",
)
sns_payload, sns_headers = _sns_inbound_email_payload_and_headers(
sns_payload, sns_headers, _ = _sns_inbound_email_setup(
sender_email=SENDER_EMAIL,
to_email=TO_EMAIL,
subject=SUBJECT,

View file

@ -868,6 +868,9 @@ INBOUND_EMAIL_ESP = os.getenv("INBOUND_EMAIL_ESP")
INBOUND_EMAIL_DOMAIN = os.getenv("INBOUND_EMAIL_DOMAIN")
INBOUND_EMAIL_WEBHOOK_SECRET = os.getenv("INBOUND_EMAIL_WEBHOOK_SECRET")
INBOUND_EMAIL_AMAZON_SNS_TOPIC_ARN = os.getenv("INBOUND_EMAIL_AMAZON_SNS_TOPIC_ARN")
INBOUND_EMAIL_AWS_ACCESS_KEY_ID = os.getenv("INBOUND_EMAIL_AWS_ACCESS_KEY_ID")
INBOUND_EMAIL_AWS_SECRET_ACCESS_KEY = os.getenv("INBOUND_EMAIL_AWS_SECRET_ACCESS_KEY")
INBOUND_EMAIL_AWS_REGION = os.getenv("INBOUND_EMAIL_AWS_REGION")
INSTALLED_ONCALL_INTEGRATIONS = [
# Featured