finish script

This commit is contained in:
Ivanov Matvey 2025-03-14 06:25:05 +10:00
parent 3f40f6109d
commit cf0ff35e60
3 changed files with 64 additions and 1 deletions

1
.gitignore vendored
View File

@ -9,3 +9,4 @@ wheels/
# venv # venv
.venv .venv
.env .env
data/

13
main.py
View File

@ -1,6 +1,19 @@
import asyncio import asyncio
from datetime import date
from src.script import script from src.script import script
from src.dataset import create_dataset
if __name__ == "__main__": if __name__ == "__main__":
user_order = None
while user_order not in ["1", "2"]:
user_order = input("choose what are you want (1,2)\n 1. generate data\n 2. export data\n").strip()
if user_order == "1":
asyncio.run(script()) asyncio.run(script())
else:
start_date = date(*map(int, input("input start date in format YYYY-MM-DD\n").strip().split("-")))
end_date = date(*map(int, input("input end date in format YYYY-MM-DD\n").strip().split("-")))
asyncio.run(create_dataset(start_date, end_date))
print("data is exported in data/dataset.csv")

49
src/dataset.py Normal file
View File

@ -0,0 +1,49 @@
import csv
from datetime import datetime
from sqlalchemy import text
from src.database_adapter import async_session_maker
RAW_SQL = '''
select
CAST(log.datetime AS DATE) as date,
SUM(case when log.action = 'REGISTRATION' and log.success_response = true then 1 else 0 end) as new_accounts,
(
case when SUM(case when log.action = 'WRITE_MESSAGE' and log.success_response = true then 1 else 0 end) = 0 then 0 else cast((
cast(SUM(case when log.action = 'WRITE_MESSAGE' and log.success_response = true and log.user_id is null then 1 else 0 end) as decimal)
/
cast(SUM(case when log.action = 'WRITE_MESSAGE' then 1 else 0 end) as decimal)
)*100 as int) end
) as statistics_messages,
SUM(case when log.action = 'WRITE_MESSAGE' then 1 else 0 end) as all_messages
from log
group by CAST(log.datetime AS DATE)
having MAKE_DATE({start_year}, {start_month}, {start_day}) <= CAST(log.datetime AS DATE) and CAST(log.datetime AS DATE) <= MAKE_DATE({end_year}, {end_month}, {end_day});
'''
async def create_dataset(start_date: datetime, end_date: datetime):
async with async_session_maker() as session:
stmt = text(RAW_SQL.format(
start_year=start_date.year,
start_month=start_date.month,
start_day=start_date.day,
end_year=end_date.year,
end_month=end_date.month,
end_day=end_date.day
))
result_orm = (await session.execute(stmt)).all()
with open('data/dataset.csv', 'w', newline='') as csvfile:
fieldnames = ['date', 'new_accounts', 'statistics_messages', 'all_messages']
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
for record in result_orm:
writer.writerow({
"date": record[0],
"new_accounts": record[1],
"statistics_messages": record[2],
"all_messages": record[3],
})