diff --git a/.gitignore b/.gitignore index 0f843db..3b72a3c 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,4 @@ wheels/ # venv .venv .env +data/ \ No newline at end of file diff --git a/main.py b/main.py index 32e0d26..a1b5154 100644 --- a/main.py +++ b/main.py @@ -1,6 +1,19 @@ import asyncio +from datetime import date + from src.script import script +from src.dataset import create_dataset if __name__ == "__main__": - asyncio.run(script()) \ No newline at end of file + user_order = None + while user_order not in ["1", "2"]: + user_order = input("choose what are you want (1,2)\n 1. generate data\n 2. export data\n").strip() + + if user_order == "1": + asyncio.run(script()) + else: + start_date = date(*map(int, input("input start date in format YYYY-MM-DD\n").strip().split("-"))) + end_date = date(*map(int, input("input end date in format YYYY-MM-DD\n").strip().split("-"))) + asyncio.run(create_dataset(start_date, end_date)) + print("data is exported in data/dataset.csv") \ No newline at end of file diff --git a/src/dataset.py b/src/dataset.py new file mode 100644 index 0000000..da1f25f --- /dev/null +++ b/src/dataset.py @@ -0,0 +1,49 @@ +import csv +from datetime import datetime + +from sqlalchemy import text +from src.database_adapter import async_session_maker + +RAW_SQL = ''' +select +CAST(log.datetime AS DATE) as date, +SUM(case when log.action = 'REGISTRATION' and log.success_response = true then 1 else 0 end) as new_accounts, +( + case when SUM(case when log.action = 'WRITE_MESSAGE' and log.success_response = true then 1 else 0 end) = 0 then 0 else cast(( + cast(SUM(case when log.action = 'WRITE_MESSAGE' and log.success_response = true and log.user_id is null then 1 else 0 end) as decimal) + / + cast(SUM(case when log.action = 'WRITE_MESSAGE' then 1 else 0 end) as decimal) + )*100 as int) end +) as statistics_messages, +SUM(case when log.action = 'WRITE_MESSAGE' then 1 else 0 end) as all_messages +from log +group by CAST(log.datetime AS DATE) +having MAKE_DATE({start_year}, {start_month}, {start_day}) <= CAST(log.datetime AS DATE) and CAST(log.datetime AS DATE) <= MAKE_DATE({end_year}, {end_month}, {end_day}); +''' + +async def create_dataset(start_date: datetime, end_date: datetime): + async with async_session_maker() as session: + stmt = text(RAW_SQL.format( + start_year=start_date.year, + start_month=start_date.month, + start_day=start_date.day, + end_year=end_date.year, + end_month=end_date.month, + end_day=end_date.day + )) + result_orm = (await session.execute(stmt)).all() + + with open('data/dataset.csv', 'w', newline='') as csvfile: + fieldnames = ['date', 'new_accounts', 'statistics_messages', 'all_messages'] + writer = csv.DictWriter(csvfile, fieldnames=fieldnames) + + writer.writeheader() + for record in result_orm: + writer.writerow({ + "date": record[0], + "new_accounts": record[1], + "statistics_messages": record[2], + "all_messages": record[3], + }) + +