Skip to content

Commit 9b867a2

Browse files
authored
Merge pull request GerevAI#52 from GerevAI/feature/bookstack-integration
Feature/bookstack integration
2 parents 6bc87e1 + 9d635c5 commit 9b867a2

File tree

10 files changed

+217
-4
lines changed

10 files changed

+217
-4
lines changed

app/data_sources/mattermost.py

Lines changed: 182 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,182 @@
1+
import logging
2+
from dataclasses import dataclass, asdict
3+
from datetime import datetime
4+
from functools import lru_cache
5+
from typing import Dict, List, Optional
6+
from urllib.parse import urlparse
7+
8+
from mattermostdriver import Driver
9+
10+
from data_source_api.base_data_source import BaseDataSource, ConfigField, HTMLInputType
11+
from data_source_api.basic_document import BasicDocument, DocumentType
12+
from data_source_api.exception import InvalidDataSourceConfig
13+
from data_source_api.utils import parse_with_workers
14+
from indexing_queue import IndexingQueue
15+
16+
logger = logging.getLogger(__name__)
17+
18+
19+
@dataclass
20+
class MattermostChannel:
21+
id: str
22+
name: str
23+
team_id: str
24+
25+
26+
@dataclass
27+
class MattermostConfig:
28+
url: str
29+
token: str
30+
scheme: Optional[str] = "https"
31+
port: Optional[int] = 443
32+
33+
def __post_init__(self):
34+
try:
35+
parsed_url = urlparse(self.url)
36+
except Exception as e:
37+
raise ValueError from e
38+
39+
self.url = parsed_url.hostname
40+
self.port = parsed_url.port if parsed_url.port is not None else self.port
41+
self.scheme = parsed_url.scheme if parsed_url.scheme != "" else self.scheme
42+
43+
44+
class MattermostDataSource(BaseDataSource):
45+
FEED_BATCH_SIZE = 500
46+
47+
@staticmethod
48+
def get_config_fields() -> List[ConfigField]:
49+
return [
50+
ConfigField(label="Mattermost Server", name="url", placeholder="https://mattermost.server.com",
51+
input_type=HTMLInputType.TEXT),
52+
ConfigField(label="Access Token", name="token", placeholder="paste-your-access-token-here",
53+
input_type=HTMLInputType.PASSWORD),
54+
]
55+
56+
@staticmethod
57+
def validate_config(config: Dict) -> None:
58+
try:
59+
parsed_config = MattermostConfig(**config)
60+
maattermost = Driver(options=asdict(parsed_config))
61+
maattermost.login()
62+
except Exception as e:
63+
raise InvalidDataSourceConfig from e
64+
65+
def __init__(self, *args, **kwargs):
66+
super().__init__(*args, **kwargs)
67+
mattermost_config = MattermostConfig(**self._config)
68+
self._mattermost = Driver(options=asdict(mattermost_config))
69+
70+
def _list_channels(self) -> List[MattermostChannel]:
71+
channels = self._mattermost.channels.client.get(f"/users/me/channels")
72+
return [MattermostChannel(id=channel["id"], name=channel["name"], team_id=channel["team_id"])
73+
for channel in channels]
74+
75+
def _is_valid_message(self, message: Dict) -> bool:
76+
return message["type"] == ""
77+
78+
def _is_valid_channel(self, channel: MattermostChannel) -> bool:
79+
return channel.team_id != ""
80+
81+
def _list_posts_in_channel(self, channel_id: str, page: int) -> Dict:
82+
endpoint = f"/channels/{channel_id}/posts"
83+
params = {
84+
"since": int(self._last_index_time.timestamp()) * 1000,
85+
"page": page
86+
}
87+
88+
posts = self._mattermost.channels.client.get(endpoint, params=params)
89+
return posts
90+
91+
def _feed_new_documents(self) -> None:
92+
self._mattermost.login()
93+
channels = self._list_channels()
94+
95+
logger.info(f'Found {len(channels)} channels')
96+
parse_with_workers(self._parse_channel_worker, channels)
97+
98+
def _parse_channel_worker(self, channels: List[MattermostChannel]):
99+
for channel in channels:
100+
self._feed_channel(channel)
101+
102+
def _get_mattermost_url(self):
103+
options = self._mattermost.options
104+
return f"{options['scheme']}://{options['url']}:{options['port']}"
105+
106+
def _get_team_url(self, channel: MattermostChannel):
107+
url = self._get_mattermost_url()
108+
team = self._mattermost.teams.get_team(channel.team_id)
109+
return f"{url}/{team['name']}"
110+
111+
@lru_cache(maxsize=512)
112+
def _get_mattermost_user(self, user_id: str):
113+
return self._mattermost.users.get_user(user_id)["username"]
114+
115+
def _feed_channel(self, channel: MattermostChannel):
116+
if not self._is_valid_channel(channel):
117+
return
118+
logger.info(f'Feeding channel {channel.name}')
119+
120+
page = 0
121+
total_fed = 0
122+
123+
parsed_posts = []
124+
125+
team_url = self._get_team_url(channel)
126+
127+
while True:
128+
posts = self._list_posts_in_channel(channel.id, page)
129+
130+
last_message: Optional[BasicDocument] = None
131+
132+
posts["order"].reverse()
133+
for id in posts["order"]:
134+
post = posts["posts"][id]
135+
136+
if not self._is_valid_message(post):
137+
if last_message is not None:
138+
parsed_posts.append(last_message)
139+
last_message = None
140+
continue
141+
142+
author = self._get_mattermost_user(post["user_id"])
143+
content = post["message"]
144+
145+
if last_message is not None:
146+
if last_message.author == author:
147+
last_message.content += f"\n{content}"
148+
continue
149+
else:
150+
parsed_posts.append(last_message)
151+
if len(parsed_posts) >= MattermostDataSource.FEED_BATCH_SIZE:
152+
total_fed += len(parsed_posts)
153+
IndexingQueue.get().feed(docs=parsed_posts)
154+
parsed_posts = []
155+
156+
author_image_url = f"{self._get_mattermost_url()}/api/v4/users/{post['user_id']}/image?_=0"
157+
timestamp = datetime.fromtimestamp(post["update_at"] / 1000)
158+
last_message = BasicDocument(
159+
id=id,
160+
data_source_id=self._data_source_id,
161+
title=channel.name,
162+
content=content,
163+
timestamp=timestamp,
164+
author=author,
165+
author_image_url=author_image_url,
166+
location=channel.name,
167+
url=f"{team_url}/pl/{id}",
168+
type=DocumentType.MESSAGE
169+
)
170+
171+
if last_message is not None:
172+
parsed_posts.append(last_message)
173+
174+
if posts["prev_post_id"] == "":
175+
break
176+
page += 1
177+
178+
IndexingQueue.get().feed(docs=parsed_posts)
179+
total_fed += len(parsed_posts)
180+
181+
if len(parsed_posts) > 0:
182+
logger.info(f"Worker fed {total_fed} documents")

app/requirements.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,4 +19,5 @@ google-auth-oauthlib
1919
oauth2client
2020
mammoth
2121
python-pptx
22-
alembic
22+
alembic
23+
mattermostdriver

app/search_logic.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,10 @@ def _assign_answer_sentence(candidate: Candidate, answer: str):
155155
def _find_answers_in_candidates(candidates: List[Candidate], query: str) -> List[Candidate]:
156156
contexts = [candidate.content for candidate in candidates]
157157
answers = qa_model(question=[query] * len(contexts), context=contexts)
158+
159+
if type(answers) == dict:
160+
answers = [answers]
161+
158162
for candidate, answer in zip(candidates, answers):
159163
_assign_answer_sentence(candidate, answer['answer'])
160164

38.9 KB
Loading

images/everything.png

1.27 KB
Loading

ui/package-lock.json

Lines changed: 11 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

ui/package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
"react": "^18.2.0",
1515
"react-dom": "^18.2.0",
1616
"react-icons": "^4.8.0",
17+
"react-image": "^4.1.0",
1718
"react-modal": "^3.16.1",
1819
"react-scripts": "5.0.1",
1920
"react-select": "^5.7.0",

ui/src/assets/images/user.webp

2.33 KB
Binary file not shown.

ui/src/components/data-source-panel.tsx

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ import { ClipLoader } from "react-spinners";
1414
import { toast } from 'react-toastify';
1515
import { api } from "../api";
1616
import { ConfigField, DataSourceType } from "../data-source";
17+
import { colors } from "react-select/dist/declarations/src/theme";
1718

1819

1920
export interface SelectOption {
@@ -195,6 +196,16 @@ export default class DataSourcePanel extends React.Component<DataSourcePanelProp
195196
{
196197
<div className="flex flex-col ">
197198
<div className="bg-[#352C45] py-[26px] px-10 rounded-xl border-[1px] border-[#4e326b]">
199+
{
200+
this.state.selectedDataSource.value === 'mattermost' && (
201+
<span className="flex flex-col leading-9 text-xl text-white">
202+
<span>1. {'Go to your Mattermost -> top-right profile picture -> Profile'}</span>
203+
<span>2. {'Security -> Personal Access Tokens -> Create token -> Name it'}</span>
204+
<span>3. {"Copy the Access Token"}</span>
205+
<span className="text-violet-300/[.75] text-sm"> {"* Personal Access Tokens must be on"} - <a className="inline hover:underline text-violet-400/[.75]" target="_blank" href="https://developers.mattermost.com/integrate/reference/personal-access-token/">Click for more info</a></span>
206+
</span>
207+
)
208+
}
198209
{
199210
this.state.selectedDataSource.value === 'confluence' && (
200211
<span className="flex flex-col leading-9 text-xl text-white">

ui/src/components/search-result.tsx

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,14 @@
11

22
import React from 'react';
3+
import {Img} from 'react-image'
34

45
import BlueFolder from '../assets/images/blue-folder.svg';
56
import GoogleDoc from '../assets/images/google-doc.svg';
67
import Docx from '../assets/images/docx.svg';
78
import Pptx from '../assets/images/pptx.svg';
9+
import DefaultUserImage from '../assets/images/user.webp';
810
import { DataSourceType } from '../data-source';
911

10-
1112
export interface TextPart {
1213
content: string
1314
bold: boolean
@@ -69,7 +70,9 @@ export const SearchResult = (props: SearchResultProps) => {
6970
{props.resultDetails.location} ·&thinsp;
7071
</span>
7172
<span className="flex flex-row items-center">
72-
<img alt="author" className="inline-block ml-2 mr-2 h-4 rounded-xl" src={props.resultDetails.author_image_data ? props.resultDetails.author_image_data : props.resultDetails.author_image_url}></img>
73+
74+
<Img alt="author" className="inline-block ml-2 mr-2 h-4 rounded-xl"
75+
src={[props.resultDetails.author_image_url, props.resultDetails.author_image_data, DefaultUserImage]}></Img>
7376
<span className='capitalize'>{props.resultDetails.author} ·</span>
7477
</span>
7578
<span>
@@ -151,7 +154,7 @@ function getBigIcon(props: SearchResultProps) {
151154
if (onTopImage !== "") {
152155
return (
153156
<div className="mt-2 mr-[10px] drop-shadow-[0_0_25px_rgba(212,179,255,0.15)]">
154-
<img height={"45px"} width={"45px"} className={containingClasses} alt="file-type" src={containingImage}></img>
157+
<Img height={"45px"} width={"45px"} className={containingClasses} alt="file-type" src={[containingImage, DefaultUserImage]}/>
155158
<img alt="file-type" className="company-logo rounded-full p-[3px] h-[24px] w-[24px] absolute -right-[5px] -bottom-[5px] bg-white" src={onTopImage}></img>
156159
</div>
157160
)

0 commit comments

Comments
 (0)