-
Notifications
You must be signed in to change notification settings - Fork 3
/
app.py
180 lines (145 loc) · 4.69 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
import os
import re
from typing import Dict, Optional, Union
import uvicorn
from apscheduler.schedulers.asyncio import AsyncIOScheduler
from apscheduler.triggers.interval import IntervalTrigger
from dotenv import load_dotenv
from fastapi import FastAPI
from pydantic import BaseModel, validator
from src.pdf_summarization import APIInterface
load_dotenv()
class SummarizeRequest(BaseModel):
"""
A class to represent the request of the summarize API,
from Slack Events API.
"""
token: str
type: str
event: Optional[Dict]
challenge: Optional[str]
@validator("event")
def event_text_must_contain_arxiv_id(cls, v: Optional[Dict]) -> Optional[Dict]:
"""
Validate the event text to contain arXiv ID.
Parameters
----------
v : Optional[Dict]
The event dictionary (optional).
Returns
-------
v : Optional[Dict]
The event dictionary if the text contains arXiv ID.
Raises
------
ValueError
If the text does not contain arXiv ID.
"""
# check if the text contains arXiv ID (e.g. 2101.00001)
if not re.search(r"\d{4}\.\d{5}", v["text"]):
raise ValueError("The text must contain arXiv ID.")
return v
@validator("event")
def event_type_must_be_app_mention(cls, v: Optional[Dict]) -> Optional[Dict]:
"""
Validate the event to be app_mention.
Parameters
----------
v : Optional[Dict]
The event dictionary (optional).
Returns
-------
v : Optional[Dict]
The event dictionary if the event is app_mention.
Raises
------
ValueError
If the event is not app_mention.
"""
# check if the event is app_mention
if v["type"] != "app_mention":
raise ValueError("The event must be app_mention.")
return v
@validator("event")
def event_client_msg_id_must_be_unique(cls, v: Optional[Dict]) -> Optional[Dict]:
"""
Validate the client_msg_id to be unique.
If the client_msg_id is unique, save it to the msg_id.log file.
If deplicated, raise ValueError.
Parameters
----------
v : Optional[Dict]
The event dictionary (optional).
Returns
-------
v : Optional[Dict]
The event dictionary if the client_msg_id is unique.
Raises
------
ValueError
If the client_msg_id is deplicated.
"""
# check if the client_msd_id is deplicated
if os.path.exists("msg_id.log"):
with open("msg_id.log", "r") as f:
if v["client_msg_id"] in [ts.strip() for ts in f.readlines()]:
raise ValueError("The event arxiv_id is deplicated.")
# save the event arxiv_id to msg_id.log
with open("msg_id.log", "a") as f:
f.write(f"{v['client_msg_id']}\n")
return v
class SummarizerAPI:
"""
A class to create and run the Summarizer API using FastAPI.
Attributes
----------
app : FastAPI
The FastAPI application instance.
api_interface : APIInterface
The API interface for PDF summarization.
"""
def __init__(self):
self.app = FastAPI()
self.api_interface = APIInterface()
self.app.add_api_route(
"/summarize",
self.summarize,
methods=["POST"],
)
self.app.add_event_handler("startup", self.daily_summary)
def run(self):
"""
Run the FastAPI application using uvicorn.
"""
uvicorn.run(self.app, host="0.0.0.0", port=8760)
async def summarize(self, payload: SummarizeRequest) -> Union[str, None]:
"""
Summarize the given arXiv paper.
Parameters
----------
request : SummarizeRequest
The request object containing the arXiv ID or URL of the paper to be
summarized.
Raises
------
Exception
If the request is invalid.
"""
if payload.challenge is not None:
return payload.challenge
try:
self.api_interface.summarize(
re.search(r"\d{4}\.\d{5}", payload.event["text"]).group()
)
except Exception:
raise Exception(payload)
async def daily_summary(self) -> None:
"""
Get the daily summary of arXiv papers.
"""
scheduler = AsyncIOScheduler()
scheduler.add_job(self.api_interface.daily_summary, IntervalTrigger(hours=24))
scheduler.start()
if __name__ == "__main__":
api = SummarizerAPI()
api.run()