{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Example of Page-wise Aggregation" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "# import openLA as la\n", "import OpenLA as la" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "course_info, event_stream = la.start_analysis(files_dir=\"dataset_sample\", course_id=\"A\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Before conversion" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
useridcontentsidoperationnamepagenomarkermemo_lengthdevicecodeeventtime
0A_U1C1PREV10NaN0tablet2018-04-09 10:57:15
1A_U1C1PREV9NaN0tablet2018-04-09 11:00:59
2A_U1C1PREV8NaN0tablet2018-04-09 11:03:31
3A_U1C1PREV30NaN0tablet2018-04-10 10:14:12
4A_U1C1PREV29NaN0tablet2018-04-10 10:27:24
...........................
263279A_U99C8NEXT3NaN0pc2018-06-05 16:16:18
263280A_U99C8ADD MARKER4difficult0pc2018-06-05 16:18:34
263281A_U99C8NEXT4NaN0pc2018-06-05 16:19:24
263282A_U99C8NEXT5NaN0pc2018-06-05 16:20:45
263283A_U99C8PREV6NaN0pc2018-06-05 16:21:03
\n", "

263284 rows × 8 columns

\n", "
" ], "text/plain": [ " userid contentsid operationname pageno marker memo_length \\\n", "0 A_U1 C1 PREV 10 NaN 0 \n", "1 A_U1 C1 PREV 9 NaN 0 \n", "2 A_U1 C1 PREV 8 NaN 0 \n", "3 A_U1 C1 PREV 30 NaN 0 \n", "4 A_U1 C1 PREV 29 NaN 0 \n", "... ... ... ... ... ... ... \n", "263279 A_U99 C8 NEXT 3 NaN 0 \n", "263280 A_U99 C8 ADD MARKER 4 difficult 0 \n", "263281 A_U99 C8 NEXT 4 NaN 0 \n", "263282 A_U99 C8 NEXT 5 NaN 0 \n", "263283 A_U99 C8 PREV 6 NaN 0 \n", "\n", " devicecode eventtime \n", "0 tablet 2018-04-09 10:57:15 \n", "1 tablet 2018-04-09 11:00:59 \n", "2 tablet 2018-04-09 11:03:31 \n", "3 tablet 2018-04-10 10:14:12 \n", "4 tablet 2018-04-10 10:27:24 \n", "... ... ... \n", "263279 pc 2018-06-05 16:16:18 \n", "263280 pc 2018-06-05 16:18:34 \n", "263281 pc 2018-06-05 16:19:24 \n", "263282 pc 2018-06-05 16:20:45 \n", "263283 pc 2018-06-05 16:21:03 \n", "\n", "[263284 rows x 8 columns]" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "event_stream.df" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Behavior in each page (the total staying seconds and operation count in each page)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "behavior_each_page = la.convert_into_page_wise(event_stream=event_stream,\n", " user_id=course_info.user_id()[:5],\n", " contents_id=None, \n", " invalid_seconds=5,\n", " timeout_seconds=20*60,\n", " count_operation=True,\n", " operation_name=[\"OPEN\", \"CLOSE\", \"ADD MARKER\", \"ADD MEMO\", \"ADD BOOKMARK\"],\n", " separate_marker_type=False)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
useridcontentsidpagenonum_visitsaverage_reading_secondsreading_secondsOPENCLOSEADD MARKERADD MEMOADD BOOKMARK
0A_U1C1112148.416667178180000
1A_U1C128240.500000192401200
2A_U1C13593.40000046700000
3A_U1C14674.83333344900000
4A_U1C15687.83333352700000
....................................
1734A_U102C879646.16666727700000
1735A_U102C880644.16666726500000
1736A_U102C881428.25000011300000
1737A_U102C8826143.50000086100000
1738A_U102C8834100.00000040000000
\n", "

1739 rows × 11 columns

\n", "
" ], "text/plain": [ " userid contentsid pageno num_visits average_reading_seconds \\\n", "0 A_U1 C1 1 12 148.416667 \n", "1 A_U1 C1 2 8 240.500000 \n", "2 A_U1 C1 3 5 93.400000 \n", "3 A_U1 C1 4 6 74.833333 \n", "4 A_U1 C1 5 6 87.833333 \n", "... ... ... ... ... ... \n", "1734 A_U102 C8 79 6 46.166667 \n", "1735 A_U102 C8 80 6 44.166667 \n", "1736 A_U102 C8 81 4 28.250000 \n", "1737 A_U102 C8 82 6 143.500000 \n", "1738 A_U102 C8 83 4 100.000000 \n", "\n", " reading_seconds OPEN CLOSE ADD MARKER ADD MEMO ADD BOOKMARK \n", "0 1781 8 0 0 0 0 \n", "1 1924 0 1 2 0 0 \n", "2 467 0 0 0 0 0 \n", "3 449 0 0 0 0 0 \n", "4 527 0 0 0 0 0 \n", "... ... ... ... ... ... ... \n", "1734 277 0 0 0 0 0 \n", "1735 265 0 0 0 0 0 \n", "1736 113 0 0 0 0 0 \n", "1737 861 0 0 0 0 0 \n", "1738 400 0 0 0 0 0 \n", "\n", "[1739 rows x 11 columns]" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "behavior_each_page.df" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Save the data to CSV file" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "behavior_each_page.to_csv(save_file=\"data.csv\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Aggregate information" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "5" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Number of users in the data\n", "behavior_each_page.num_users()" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['A_U1', 'A_U10', 'A_U100', 'A_U101', 'A_U102']" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# User ids in the data\n", "behavior_each_page.user_id()" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['C1', 'C2', 'C3', 'C4', 'C5', 'C7', 'C8', 'C6']" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Contents ids in the data\n", "behavior_each_page.contents_id()" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['OPEN', 'CLOSE', 'ADD MARKER', 'ADD MEMO', 'ADD BOOKMARK']" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Operation names in the data\n", "behavior_each_page.operation_name()" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "2" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Number of selected operation logs\n", "behavior_each_page.operation_count(operation_name=\"ADD MARKER\", \n", " user_id=\"A_U1\", \n", " contents_id=\"C1\")" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'ADD MARKER': 2, 'ADD BOOKMARK': 3}" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "behavior_each_page.operation_count(operation_name=[\"ADD MARKER\", \"ADD BOOKMARK\"], \n", " user_id=\"A_U1\", \n", " contents_id=\"C1\")" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "67" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Number of unique pages in the data\n", "behavior_each_page.num_unique_pages(user_id=\"A_U1\", contents_id=\"C1\")" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "42748" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Total reading seconds in the data\n", "behavior_each_page.reading_seconds(user_id=\"A_U1\", contents_id=\"C1\")" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "42748" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Total reading time in the data (seconds)\n", "behavior_each_page.reading_time(time_unit=\"seconds\", user_id=\"A_U1\", contents_id=\"C1\")" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "712.4666666666667" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Total reading time in the data (minutes)\n", "behavior_each_page.reading_time(time_unit=\"minutes\", user_id=\"A_U1\", contents_id=\"C1\")" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "11.874444444444444" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Total reading time in the data (hours)\n", "behavior_each_page.reading_time(time_unit=\"hours\", user_id=\"A_U1\", contents_id=\"C1\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Visualize information\n", "The average operation count of all users in each page." ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(,\n", " )" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "la.visualize_behavior_in_pages(behavior_each_page, \n", " user_id=None,\n", " contents_id=\"C6\",\n", " is_plot_operation=True,\n", " is_plot_reading_time=True,\n", " operation_name=None,\n", " reading_time_basis=\"minutes\",\n", " calculate_type=\"average\",\n", " operation_bar_colors=None,\n", " reading_time_color=\"brown\",\n", " figsize=(20, 10),\n", " fontsize=18,\n", " save_file=None)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" }, "pycharm": { "stem_cell": { "cell_type": "raw", "metadata": { "collapsed": false }, "source": [] } } }, "nbformat": 4, "nbformat_minor": 4 }