Inbound email: download from S3 + convert HTML to plaintext (#5348)

# What this PR does

* Make `AmazonSESValidatedInboundWebhookView` able to download emails
from S3 by providing AWS credentials via env variables
* Convert HTML to plaintext when there's only `text/html` available

## Which issue(s) this PR closes

Related to https://github.com/grafana/oncall-private/issues/2905

## Checklist

- [x] Unit, integration, and e2e (if applicable) tests updated
- [x] Documentation added (or `pr:no public docs` PR label added if not
required)
- [x] Added the relevant release notes label (see labels prefixed w/
`release:`). These labels dictate how your PR will
    show up in the autogenerated release notes.
This commit is contained in:
Vadim Stepanov 2024-12-18 16:35:44 +00:00 committed by GitHub
parent 0694fe5572
commit c36761e345
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 245 additions and 31 deletions

View file

@ -7,6 +7,8 @@ from anymail.exceptions import AnymailAPIError, AnymailInvalidAddress, AnymailWe
from anymail.inbound import AnymailInboundMessage from anymail.inbound import AnymailInboundMessage
from anymail.signals import AnymailInboundEvent from anymail.signals import AnymailInboundEvent
from anymail.webhooks import amazon_ses, mailgun, mailjet, mandrill, postal, postmark, sendgrid, sparkpost from anymail.webhooks import amazon_ses, mailgun, mailjet, mandrill, postal, postmark, sendgrid, sparkpost
from bs4 import BeautifulSoup
from django.conf import settings
from django.http import HttpResponse, HttpResponseNotAllowed from django.http import HttpResponse, HttpResponseNotAllowed
from django.utils import timezone from django.utils import timezone
from rest_framework import status from rest_framework import status
@ -25,6 +27,15 @@ class AmazonSESValidatedInboundWebhookView(amazon_ses.AmazonSESInboundWebhookVie
# disable "Your Anymail webhooks are insecure and open to anyone on the web." warning # disable "Your Anymail webhooks are insecure and open to anyone on the web." warning
warn_if_no_basic_auth = False warn_if_no_basic_auth = False
def __init__(self):
super().__init__(
session_params={
"aws_access_key_id": settings.INBOUND_EMAIL_AWS_ACCESS_KEY_ID,
"aws_secret_access_key": settings.INBOUND_EMAIL_AWS_SECRET_ACCESS_KEY,
"region_name": settings.INBOUND_EMAIL_AWS_REGION,
},
)
def validate_request(self, request): def validate_request(self, request):
"""Add SNS message validation to Amazon SES inbound webhook view, which is not implemented in Anymail.""" """Add SNS message validation to Amazon SES inbound webhook view, which is not implemented in Anymail."""
if not validate_amazon_sns_message(self._parse_sns_message(request)): if not validate_amazon_sns_message(self._parse_sns_message(request)):
@ -74,11 +85,10 @@ class InboundEmailWebhookView(AlertChannelDefiningMixin, APIView):
if request.method.lower() == "head": if request.method.lower() == "head":
return HttpResponse(status=status.HTTP_200_OK) return HttpResponse(status=status.HTTP_200_OK)
integration_token = self.get_integration_token_from_request(request) if self.integration_token is None:
if integration_token is None:
return HttpResponse(status=status.HTTP_400_BAD_REQUEST) return HttpResponse(status=status.HTTP_400_BAD_REQUEST)
request.inbound_email_integration_token = integration_token # used in RequestTimeLoggingMiddleware request.inbound_email_integration_token = self.integration_token # used in RequestTimeLoggingMiddleware
return super().dispatch(request, alert_channel_key=integration_token) return super().dispatch(request, alert_channel_key=self.integration_token)
def post(self, request): def post(self, request):
payload = self.get_alert_payload_from_email_message(self.message) payload = self.get_alert_payload_from_email_message(self.message)
@ -94,7 +104,8 @@ class InboundEmailWebhookView(AlertChannelDefiningMixin, APIView):
) )
return Response("OK", status=status.HTTP_200_OK) return Response("OK", status=status.HTTP_200_OK)
def get_integration_token_from_request(self, request) -> Optional[str]: @cached_property
def integration_token(self) -> Optional[str]:
if not self.message: if not self.message:
return None return None
# First try envelope_recipient field. # First try envelope_recipient field.
@ -151,7 +162,8 @@ class InboundEmailWebhookView(AlertChannelDefiningMixin, APIView):
logger.error("Failed to parse inbound email message") logger.error("Failed to parse inbound email message")
return None return None
def check_inbound_email_settings_set(self): @staticmethod
def check_inbound_email_settings_set():
""" """
Guard method to checks if INBOUND_EMAIL settings present. Guard method to checks if INBOUND_EMAIL settings present.
Returns InternalServerError if not. Returns InternalServerError if not.
@ -167,16 +179,105 @@ class InboundEmailWebhookView(AlertChannelDefiningMixin, APIView):
logger.error("InboundEmailWebhookView: INBOUND_EMAIL_DOMAIN env variable must be set.") logger.error("InboundEmailWebhookView: INBOUND_EMAIL_DOMAIN env variable must be set.")
return HttpResponse(status=status.HTTP_500_INTERNAL_SERVER_ERROR) return HttpResponse(status=status.HTTP_500_INTERNAL_SERVER_ERROR)
def get_alert_payload_from_email_message(self, email: AnymailInboundMessage) -> EmailAlertPayload: @classmethod
subject = email.subject or "" def get_alert_payload_from_email_message(cls, email: AnymailInboundMessage) -> EmailAlertPayload:
subject = subject.strip() if email.text:
message = email.text or "" message = email.text.strip()
message = message.strip() elif email.html:
sender = self.get_sender_from_email_message(email) message = cls.html_to_plaintext(email.html)
else:
message = ""
return {"subject": subject, "message": message, "sender": sender} return {
"subject": email.subject.strip() if email.subject else "",
"message": message,
"sender": cls.get_sender_from_email_message(email),
}
def get_sender_from_email_message(self, email: AnymailInboundMessage) -> str: @staticmethod
def html_to_plaintext(html: str) -> str:
"""
Converts HTML to plain text. Renders links as "text (href)" and removes any empty lines.
Converting HTML to plaintext is a non-trivial task, so this method may not work perfectly for all cases.
"""
soup = BeautifulSoup(html, "html.parser")
# Browsers typically render these elements on their own line.
# There is no single official HTML5 list for this, so we go with HTML tags that render as
# display: block, display: list-item, display: table, display: table-row by default according to the HTML standard:
# https://html.spec.whatwg.org/multipage/rendering.html
newline_tags = [
"address",
"article",
"aside",
"blockquote",
"body",
"center",
"dd",
"details",
"dialog",
"dir",
"div",
"dl",
"dt",
"fieldset",
"figcaption",
"figure",
"footer",
"form",
"h1",
"h2",
"h3",
"h4",
"h5",
"h6",
"header",
"hgroup",
"hr",
"html",
"legend",
"li",
"listing",
"main",
"menu",
"nav",
"ol",
"p",
"plaintext",
"pre",
"search",
"section",
"summary",
"table",
"tr",
"ul",
"xmp",
]
# Insert a newline after each block-level element
for tag in soup.find_all(newline_tags):
tag.insert_before("\n")
tag.insert_after("\n")
# <br> tags are also typically rendered as newlines
for br in soup.find_all("br"):
br.replace_with("\n")
# example: "<a href="https://example.com">example</a>" -> "example (https://example.com)"
for a in soup.find_all("a"):
if href := a.get("href"):
a.append(f" ({href})")
for li in soup.find_all("li"):
li.insert_before("* ")
for hr in soup.find_all("hr"):
hr.replace_with("-" * 32)
# remove empty lines
return "\n".join(line.strip() for line in soup.get_text().splitlines() if line.strip())
@staticmethod
def get_sender_from_email_message(email: AnymailInboundMessage) -> str:
try: try:
if isinstance(email.from_email, list): if isinstance(email.from_email, list):
sender = email.from_email[0].addr_spec sender = email.from_email[0].addr_spec

View file

@ -6,6 +6,7 @@ from base64 import b64encode
from textwrap import dedent from textwrap import dedent
from unittest.mock import ANY, Mock, patch from unittest.mock import ANY, Mock, patch
import httpretty
import pytest import pytest
from anymail.inbound import AnymailInboundMessage from anymail.inbound import AnymailInboundMessage
from cryptography import x509 from cryptography import x509
@ -54,13 +55,14 @@ SUBJECT = "Test email"
MESSAGE = "This is a test email message body." MESSAGE = "This is a test email message body."
def _sns_inbound_email_payload_and_headers(sender_email, to_email, subject, message): def _sns_inbound_email_setup(sender_email, to_email, subject, message, content_type="text/plain", s3=False):
content = ( content = (
f"From: Sender Name <{sender_email}>\n" f"From: Sender Name <{sender_email}>\n"
f"To: {to_email}\n" f"To: {to_email}\n"
f"Subject: {subject}\n" f"Subject: {subject}\n"
"Date: Tue, 5 Nov 2024 16:05:39 +0000\n" "Date: Tue, 5 Nov 2024 16:05:39 +0000\n"
"Message-ID: <example-message-id@mail.example.com>\n\n" "Message-ID: <example-message-id@mail.example.com>\n"
f"Content-Type: {content_type}\n\n"
f"{message}\r\n" f"{message}\r\n"
) )
@ -130,7 +132,7 @@ def _sns_inbound_email_payload_and_headers(sender_email, to_email, subject, mess
{"name": "To", "value": to_email}, {"name": "To", "value": to_email},
{ {
"name": "Content-Type", "name": "Content-Type",
"value": 'multipart/alternative; boundary="00000000000036b9f706262c9312"', "value": f"{content_type}",
}, },
], ],
"commonHeaders": { "commonHeaders": {
@ -152,12 +154,12 @@ def _sns_inbound_email_payload_and_headers(sender_email, to_email, subject, mess
"dkimVerdict": {"status": "PASS"}, "dkimVerdict": {"status": "PASS"},
"dmarcVerdict": {"status": "PASS"}, "dmarcVerdict": {"status": "PASS"},
"action": { "action": {
"type": "SNS", "type": "S3" if s3 else "SNS",
"topicArn": "arn:aws:sns:us-east-2:123456789012:test", "topicArn": "arn:aws:sns:us-east-2:123456789012:test",
"encoding": "BASE64", **({"bucketName": "test-s3-bucket", "objectKey": "test-object-key"} if s3 else {"encoding": "BASE64"}),
}, },
}, },
"content": b64encode(content.encode()).decode(), **({} if s3 else {"content": b64encode(content.encode()).decode()}),
} }
payload = { payload = {
@ -189,7 +191,7 @@ def _sns_inbound_email_payload_and_headers(sender_email, to_email, subject, mess
"X-Amz-Sns-Message-Type": "Notification", "X-Amz-Sns-Message-Type": "Notification",
"X-Amz-Sns-Message-Id": "example-message-id-1234", "X-Amz-Sns-Message-Id": "example-message-id-1234",
} }
return payload, headers return payload, headers, content
def _mailgun_inbound_email_payload(sender_email, to_email, subject, message): def _mailgun_inbound_email_payload(sender_email, to_email, subject, message):
@ -444,7 +446,7 @@ def test_amazon_ses_pass(create_alert_mock, settings, make_organization, make_al
token="test-token", token="test-token",
) )
sns_payload, sns_headers = _sns_inbound_email_payload_and_headers( sns_payload, sns_headers, _ = _sns_inbound_email_setup(
sender_email=SENDER_EMAIL, sender_email=SENDER_EMAIL,
to_email=TO_EMAIL, to_email=TO_EMAIL,
subject=SUBJECT, subject=SUBJECT,
@ -476,16 +478,17 @@ def test_amazon_ses_pass(create_alert_mock, settings, make_organization, make_al
) )
@patch("requests.get", return_value=Mock(content=CERTIFICATE))
@patch.object(create_alert, "delay") @patch.object(create_alert, "delay")
@httpretty.activate(verbose=True, allow_net_connect=True)
@pytest.mark.django_db @pytest.mark.django_db
def test_amazon_ses_validated_pass( def test_amazon_ses_validated_s3_pass(mock_create_alert, settings, make_organization, make_alert_receive_channel):
mock_create_alert, mock_requests_get, settings, make_organization, make_alert_receive_channel
):
settings.INBOUND_EMAIL_ESP = "amazon_ses_validated,mailgun" settings.INBOUND_EMAIL_ESP = "amazon_ses_validated,mailgun"
settings.INBOUND_EMAIL_DOMAIN = "inbound.example.com" settings.INBOUND_EMAIL_DOMAIN = "inbound.example.com"
settings.INBOUND_EMAIL_WEBHOOK_SECRET = "secret" settings.INBOUND_EMAIL_WEBHOOK_SECRET = "secret"
settings.INBOUND_EMAIL_AMAZON_SNS_TOPIC_ARN = AMAZON_SNS_TOPIC_ARN settings.INBOUND_EMAIL_AMAZON_SNS_TOPIC_ARN = AMAZON_SNS_TOPIC_ARN
settings.INBOUND_EMAIL_AWS_ACCESS_KEY_ID = "test-access-key-id"
settings.INBOUND_EMAIL_AWS_SECRET_ACCESS_KEY = "test-secret-access-key"
settings.INBOUND_EMAIL_AWS_REGION = "us-east-2"
organization = make_organization() organization = make_organization()
alert_receive_channel = make_alert_receive_channel( alert_receive_channel = make_alert_receive_channel(
@ -494,11 +497,24 @@ def test_amazon_ses_validated_pass(
token="test-token", token="test-token",
) )
sns_payload, sns_headers = _sns_inbound_email_payload_and_headers( sns_payload, sns_headers, content = _sns_inbound_email_setup(
sender_email=SENDER_EMAIL, sender_email=SENDER_EMAIL,
to_email=TO_EMAIL, to_email=TO_EMAIL,
subject=SUBJECT, subject=SUBJECT,
message=MESSAGE, message=MESSAGE,
s3=True,
)
httpretty.register_uri(httpretty.GET, SIGNING_CERT_URL, body=CERTIFICATE)
httpretty.register_uri(
httpretty.HEAD,
"https://test-s3-bucket.s3.us-east-2.amazonaws.com/test-object-key",
responses=[httpretty.Response(body="")],
)
httpretty.register_uri(
httpretty.GET,
"https://test-s3-bucket.s3.us-east-2.amazonaws.com/test-object-key",
responses=[httpretty.Response(body=content)],
) )
client = APIClient() client = APIClient()
@ -525,6 +541,100 @@ def test_amazon_ses_validated_pass(
received_at=ANY, received_at=ANY,
) )
assert len(httpretty.latest_requests()) == 3
assert (httpretty.latest_requests()[0].method, httpretty.latest_requests()[0].path) == (
"GET",
"/SimpleNotificationService-example.pem",
)
assert (httpretty.latest_requests()[1].method, httpretty.latest_requests()[1].path) == ("HEAD", "/test-object-key")
assert (httpretty.latest_requests()[2].method, httpretty.latest_requests()[2].path) == ("GET", "/test-object-key")
@patch("requests.get", return_value=Mock(content=CERTIFICATE))
@patch.object(create_alert, "delay")
@pytest.mark.django_db
def test_amazon_ses_validated_pass_html(
mock_create_alert, mock_requests_get, settings, make_organization, make_alert_receive_channel
):
settings.INBOUND_EMAIL_ESP = "amazon_ses_validated,mailgun"
settings.INBOUND_EMAIL_DOMAIN = "inbound.example.com"
settings.INBOUND_EMAIL_WEBHOOK_SECRET = "secret"
settings.INBOUND_EMAIL_AMAZON_SNS_TOPIC_ARN = AMAZON_SNS_TOPIC_ARN
organization = make_organization()
alert_receive_channel = make_alert_receive_channel(
organization,
integration=AlertReceiveChannel.INTEGRATION_INBOUND_EMAIL,
token="test-token",
)
html_message = """\
<html>
<title>title</title>
<body>
<div>
<h1>h1</h1>
<br><br><br>
<p>p<b>b</b><i>i</i> <span>span</span></p> <p>new line</p> <hr>
<a href="https://example.com">link</a>
<ul>
<li>li1</li>
<li>li2</li>
</ul>
<table>
<tr>
<td>td1</td>
<td>td2</td>
</tr>
</table>
</div>
</body>
</html>
"""
plaintext_message = (
"title\n"
"h1\n"
"pbi span\n"
"new line\n"
"--------------------------------\n"
"link (https://example.com)\n"
"* li1\n"
"* li2\n"
"td1\n"
"td2"
)
sns_payload, sns_headers, _ = _sns_inbound_email_setup(
sender_email=SENDER_EMAIL,
to_email=TO_EMAIL,
subject=SUBJECT,
message=html_message,
content_type="text/html",
)
client = APIClient()
response = client.post(
reverse("integrations:inbound_email_webhook"),
data=sns_payload,
headers=sns_headers,
format="json",
)
assert response.status_code == status.HTTP_200_OK
mock_create_alert.assert_called_once_with(
title=SUBJECT,
message=plaintext_message,
alert_receive_channel_pk=alert_receive_channel.pk,
image_url=None,
link_to_upstream_details=None,
integration_unique_data=None,
raw_request_data={
"subject": SUBJECT,
"message": plaintext_message,
"sender": SENDER_EMAIL,
},
received_at=ANY,
)
mock_requests_get.assert_called_once_with(SIGNING_CERT_URL, timeout=5) mock_requests_get.assert_called_once_with(SIGNING_CERT_URL, timeout=5)
@ -546,7 +656,7 @@ def test_amazon_ses_validated_fail_wrong_sns_topic_arn(
token="test-token", token="test-token",
) )
sns_payload, sns_headers = _sns_inbound_email_payload_and_headers( sns_payload, sns_headers, _ = _sns_inbound_email_setup(
sender_email=SENDER_EMAIL, sender_email=SENDER_EMAIL,
to_email=TO_EMAIL, to_email=TO_EMAIL,
subject=SUBJECT, subject=SUBJECT,
@ -584,7 +694,7 @@ def test_amazon_ses_validated_fail_wrong_signature(
token="test-token", token="test-token",
) )
sns_payload, sns_headers = _sns_inbound_email_payload_and_headers( sns_payload, sns_headers, _ = _sns_inbound_email_setup(
sender_email=SENDER_EMAIL, sender_email=SENDER_EMAIL,
to_email=TO_EMAIL, to_email=TO_EMAIL,
subject=SUBJECT, subject=SUBJECT,
@ -622,7 +732,7 @@ def test_amazon_ses_validated_fail_cant_download_certificate(
token="test-token", token="test-token",
) )
sns_payload, sns_headers = _sns_inbound_email_payload_and_headers( sns_payload, sns_headers, _ = _sns_inbound_email_setup(
sender_email=SENDER_EMAIL, sender_email=SENDER_EMAIL,
to_email=TO_EMAIL, to_email=TO_EMAIL,
subject=SUBJECT, subject=SUBJECT,
@ -656,7 +766,7 @@ def test_amazon_ses_validated_caches_certificate(
token="test-token", token="test-token",
) )
sns_payload, sns_headers = _sns_inbound_email_payload_and_headers( sns_payload, sns_headers, _ = _sns_inbound_email_setup(
sender_email=SENDER_EMAIL, sender_email=SENDER_EMAIL,
to_email=TO_EMAIL, to_email=TO_EMAIL,
subject=SUBJECT, subject=SUBJECT,

View file

@ -868,6 +868,9 @@ INBOUND_EMAIL_ESP = os.getenv("INBOUND_EMAIL_ESP")
INBOUND_EMAIL_DOMAIN = os.getenv("INBOUND_EMAIL_DOMAIN") INBOUND_EMAIL_DOMAIN = os.getenv("INBOUND_EMAIL_DOMAIN")
INBOUND_EMAIL_WEBHOOK_SECRET = os.getenv("INBOUND_EMAIL_WEBHOOK_SECRET") INBOUND_EMAIL_WEBHOOK_SECRET = os.getenv("INBOUND_EMAIL_WEBHOOK_SECRET")
INBOUND_EMAIL_AMAZON_SNS_TOPIC_ARN = os.getenv("INBOUND_EMAIL_AMAZON_SNS_TOPIC_ARN") INBOUND_EMAIL_AMAZON_SNS_TOPIC_ARN = os.getenv("INBOUND_EMAIL_AMAZON_SNS_TOPIC_ARN")
INBOUND_EMAIL_AWS_ACCESS_KEY_ID = os.getenv("INBOUND_EMAIL_AWS_ACCESS_KEY_ID")
INBOUND_EMAIL_AWS_SECRET_ACCESS_KEY = os.getenv("INBOUND_EMAIL_AWS_SECRET_ACCESS_KEY")
INBOUND_EMAIL_AWS_REGION = os.getenv("INBOUND_EMAIL_AWS_REGION")
INSTALLED_ONCALL_INTEGRATIONS = [ INSTALLED_ONCALL_INTEGRATIONS = [
# Featured # Featured