Skip to content

Commit 1263e55

Browse files
authored
feat(slack): add option to include bot messages during indexing (#8309) to release v2.12 (#9399)
1 parent d676634 commit 1263e55

File tree

4 files changed

+184
-0
lines changed

4 files changed

+184
-0
lines changed

backend/onyx/connectors/slack/connector.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -308,6 +308,18 @@ def default_msg_filter(message: MessageType) -> SlackMessageFilterReason | None:
308308
return None
309309

310310

311+
def _bot_inclusive_msg_filter(
312+
message: MessageType,
313+
) -> SlackMessageFilterReason | None:
314+
"""Like default_msg_filter but allows bot/app messages through.
315+
Only filters out disallowed subtypes (channel_join, channel_leave, etc.).
316+
"""
317+
if message.get("subtype", "") in _DISALLOWED_MSG_SUBTYPES:
318+
return SlackMessageFilterReason.DISALLOWED
319+
320+
return None
321+
322+
311323
def filter_channels(
312324
all_channels: list[ChannelType],
313325
channels_to_connect: list[str] | None,
@@ -654,12 +666,18 @@ def __init__(
654666
# if specified, will treat the specified channel strings as
655667
# regexes, and will only index channels that fully match the regexes
656668
channel_regex_enabled: bool = False,
669+
# if True, messages from bots/apps will be indexed instead of filtered out
670+
include_bot_messages: bool = False,
657671
batch_size: int = INDEX_BATCH_SIZE,
658672
num_threads: int = SLACK_NUM_THREADS,
659673
use_redis: bool = True,
660674
) -> None:
661675
self.channels = channels
662676
self.channel_regex_enabled = channel_regex_enabled
677+
self.include_bot_messages = include_bot_messages
678+
self.msg_filter_func = (
679+
_bot_inclusive_msg_filter if include_bot_messages else default_msg_filter
680+
)
663681
self.batch_size = batch_size
664682
self.num_threads = num_threads
665683
self.client: WebClient | None = None
@@ -839,6 +857,7 @@ def retrieve_all_slim_docs_perm_sync(
839857
client=self.client,
840858
channels=self.channels,
841859
channel_name_regex_enabled=self.channel_regex_enabled,
860+
msg_filter_func=self.msg_filter_func,
842861
callback=callback,
843862
workspace_url=self._workspace_url,
844863
)
@@ -984,6 +1003,7 @@ def _load_from_checkpoint(
9841003
user_cache=self.user_cache,
9851004
seen_thread_ts=seen_thread_ts,
9861005
channel_access=checkpoint.current_channel_access,
1006+
msg_filter_func=self.msg_filter_func,
9871007
)
9881008
)
9891009

Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,151 @@
1+
import pytest
2+
3+
from onyx.connectors.slack.connector import _bot_inclusive_msg_filter
4+
from onyx.connectors.slack.connector import default_msg_filter
5+
from onyx.connectors.slack.connector import SlackConnector
6+
from onyx.connectors.slack.connector import SlackMessageFilterReason
7+
from onyx.connectors.slack.models import MessageType
8+
9+
10+
# -- default_msg_filter tests --
11+
12+
13+
@pytest.mark.parametrize(
14+
"message,expected_reason",
15+
[
16+
# Regular user message: not filtered
17+
(
18+
{"text": "hello", "user": "U123", "ts": "1.0"},
19+
None,
20+
),
21+
# Bot message with bot_id: filtered as BOT
22+
(
23+
{"text": "automated update", "bot_id": "B123", "ts": "1.0"},
24+
SlackMessageFilterReason.BOT,
25+
),
26+
# App message with app_id: filtered as BOT
27+
(
28+
{"text": "app notification", "app_id": "A123", "ts": "1.0"},
29+
SlackMessageFilterReason.BOT,
30+
),
31+
# Bot message with both bot_id and app_id: filtered as BOT
32+
(
33+
{"text": "bot+app", "bot_id": "B1", "app_id": "A1", "ts": "1.0"},
34+
SlackMessageFilterReason.BOT,
35+
),
36+
# DanswerBot Testing is explicitly allowed through
37+
(
38+
{
39+
"text": "danswer test",
40+
"bot_id": "B999",
41+
"bot_profile": {"name": "DanswerBot Testing"},
42+
"ts": "1.0",
43+
},
44+
None,
45+
),
46+
# channel_join subtype: filtered as DISALLOWED
47+
(
48+
{"text": "joined", "subtype": "channel_join", "ts": "1.0"},
49+
SlackMessageFilterReason.DISALLOWED,
50+
),
51+
# channel_leave subtype: filtered as DISALLOWED
52+
(
53+
{"text": "left", "subtype": "channel_leave", "ts": "1.0"},
54+
SlackMessageFilterReason.DISALLOWED,
55+
),
56+
# pinned_item subtype: filtered as DISALLOWED
57+
(
58+
{"text": "pinned", "subtype": "pinned_item", "ts": "1.0"},
59+
SlackMessageFilterReason.DISALLOWED,
60+
),
61+
# Empty subtype: not filtered
62+
(
63+
{"text": "normal", "subtype": "", "ts": "1.0"},
64+
None,
65+
),
66+
],
67+
ids=[
68+
"regular_user_message",
69+
"bot_id_message",
70+
"app_id_message",
71+
"bot_and_app_id",
72+
"danswerbot_testing_allowed",
73+
"channel_join",
74+
"channel_leave",
75+
"pinned_item",
76+
"empty_subtype",
77+
],
78+
)
79+
def test_default_msg_filter(
80+
message: MessageType,
81+
expected_reason: SlackMessageFilterReason | None,
82+
) -> None:
83+
assert default_msg_filter(message) == expected_reason
84+
85+
86+
# -- _bot_inclusive_msg_filter tests --
87+
88+
89+
@pytest.mark.parametrize(
90+
"message,expected_reason",
91+
[
92+
# Regular user message: not filtered
93+
(
94+
{"text": "hello", "user": "U123", "ts": "1.0"},
95+
None,
96+
),
97+
# Bot message: NOT filtered (this is the whole point)
98+
(
99+
{"text": "automated update", "bot_id": "B123", "ts": "1.0"},
100+
None,
101+
),
102+
# App message: NOT filtered
103+
(
104+
{"text": "app notification", "app_id": "A123", "ts": "1.0"},
105+
None,
106+
),
107+
# channel_join subtype: still filtered as DISALLOWED
108+
(
109+
{"text": "joined", "subtype": "channel_join", "ts": "1.0"},
110+
SlackMessageFilterReason.DISALLOWED,
111+
),
112+
# channel_leave subtype: still filtered as DISALLOWED
113+
(
114+
{"text": "left", "subtype": "channel_leave", "ts": "1.0"},
115+
SlackMessageFilterReason.DISALLOWED,
116+
),
117+
],
118+
ids=[
119+
"regular_user_message",
120+
"bot_message_allowed",
121+
"app_message_allowed",
122+
"channel_join_still_filtered",
123+
"channel_leave_still_filtered",
124+
],
125+
)
126+
def test_bot_inclusive_msg_filter(
127+
message: MessageType,
128+
expected_reason: SlackMessageFilterReason | None,
129+
) -> None:
130+
assert _bot_inclusive_msg_filter(message) == expected_reason
131+
132+
133+
# -- SlackConnector config tests --
134+
135+
136+
def test_default_filter_when_include_bot_messages_false() -> None:
137+
"""When include_bot_messages is False (default), the default filter is used."""
138+
connector = SlackConnector(use_redis=False)
139+
assert connector.msg_filter_func is default_msg_filter
140+
141+
142+
def test_bot_inclusive_filter_when_include_bot_messages_true() -> None:
143+
"""When include_bot_messages is True, the bot-inclusive filter is used."""
144+
connector = SlackConnector(include_bot_messages=True, use_redis=False)
145+
assert connector.msg_filter_func is _bot_inclusive_msg_filter
146+
147+
148+
def test_include_bot_messages_defaults_to_false() -> None:
149+
"""The include_bot_messages config defaults to False for backward compatibility."""
150+
connector = SlackConnector(use_redis=False)
151+
assert connector.include_bot_messages is False

web/src/components/admin/connectors/ConnectorTitle.tsx

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,9 @@ export const ConnectorTitle = ({
8585
if (typedConnector.connector_specific_config.channel_regex_enabled) {
8686
additionalMetadata.set("Channel Regex Enabled", "True");
8787
}
88+
if (typedConnector.connector_specific_config.include_bot_messages) {
89+
additionalMetadata.set("Include Bot Messages", "True");
90+
}
8891
} else if (connector.source === "zulip") {
8992
const typedConnector = connector as Connector<ZulipConfig>;
9093
additionalMetadata.set(

web/src/lib/connectors/connectors.tsx

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -992,6 +992,15 @@ export const connectorConfigs: Record<
992992
For example, specifying .*-support.* as a "channel" will cause the connector to include any channels with "-support" in the name.`,
993993
optional: true,
994994
},
995+
{
996+
type: "checkbox",
997+
query: "Include bot messages?",
998+
label: "Include Bot Messages",
999+
name: "include_bot_messages",
1000+
description:
1001+
"If enabled, messages from bots and apps will be indexed. Useful for channels that are primarily bot-driven feeds (e.g. CRM updates, automated notes).",
1002+
optional: true,
1003+
},
9951004
],
9961005
},
9971006
slab: {
@@ -1892,6 +1901,7 @@ export interface SlackConfig {
18921901
workspace: string;
18931902
channels?: string[];
18941903
channel_regex_enabled?: boolean;
1904+
include_bot_messages?: boolean;
18951905
}
18961906

18971907
export interface SlabConfig {

0 commit comments

Comments
 (0)