From 105bd3842cc405344658f3a9c2659501d91a0de8 Mon Sep 17 00:00:00 2001 From: Matthieu Napoli Date: Wed, 17 Dec 2014 14:01:29 +1300 Subject: [PATCH 1/3] #22 Split the MVC guide into more meaningful categories --- .../Content/Category/DevelopCategory.php | 15 ++++++++--- app/helpers/Redirects.php | 4 +++ docs/{mvc-models.md => apis.md} | 4 +-- docs/{mvc-controllers.md => controllers.md} | 1 - docs/http-request-handling.md | 4 --- docs/mvc-in-piwik.md | 26 ------------------- docs/pages.md | 5 ++-- docs/piwiks-extensibility-points.md | 4 +-- docs/piwiks-reporting-api.md | 2 +- docs/{mvc-views.md => views.md} | 2 -- docs/visualizing-report-data.md | 6 ++--- 11 files changed, 25 insertions(+), 48 deletions(-) rename docs/{mvc-models.md => apis.md} (97%) rename docs/{mvc-controllers.md => controllers.md} (99%) delete mode 100644 docs/mvc-in-piwik.md rename docs/{mvc-views.md => views.md} (98%) diff --git a/app/helpers/Content/Category/DevelopCategory.php b/app/helpers/Content/Category/DevelopCategory.php index 1beed8e81..5b6f85ab8 100644 --- a/app/helpers/Content/Category/DevelopCategory.php +++ b/app/helpers/Content/Category/DevelopCategory.php @@ -31,27 +31,34 @@ public function getItems() ]), new EmptySubCategory('Understanding Piwik', [ new Guide('how-piwik-works'), - new Guide('all-about-analytics-data'), + new Guide('http-request-handling'), new Guide('piwiks-extensibility-points'), ]), - new Guide('mvc-in-piwik'), new EmptySubCategory('Web Interface', [ + new Guide('controllers'), + new Guide('views'), new Guide('pages'), new Guide('menus'), new Guide('widgets'), new Guide('working-with-piwiks-ui'), new Guide('visualizing-report-data'), ]), + new EmptySubCategory('HTTP Reporting API', [ + new Guide('apis'), + new Guide('piwiks-reporting-api'), + ]), new Guide('piwik-on-the-command-line'), + new EmptySubCategory('Archiving and data', [ + new Guide('all-about-analytics-data'), + new Guide('persistence-and-the-mysql-backend'), + ]), new Guide('piwik-configuration'), - new Guide('persistence-and-the-mysql-backend'), new EmptySubCategory('Security', [ new Guide('security-in-piwik'), new Guide('permissions'), ]), new Guide('internationalization'), new Guide('tests'), - new Guide('piwiks-reporting-api'), new Guide('scheduled-tasks'), new EmptySubCategory('Piwik Core development', [ new Guide('contributing-to-piwik-core'), diff --git a/app/helpers/Redirects.php b/app/helpers/Redirects.php index ea00b51a4..9b18b4826 100644 --- a/app/helpers/Redirects.php +++ b/app/helpers/Redirects.php @@ -19,6 +19,10 @@ public static function getRedirects() '/api-reference/metadata' => '/api-reference/reporting-api-metadata', '/api-reference/segmentation' => '/api-reference/reporting-api-segmentation', '/guides/automated-tests' => '/guides/tests', + '/guides/mvc-in-piwik' => '/guides/controllers', + '/guides/mvc-models' => '/guides/apis', + '/guides/mvc-views' => '/guides/views', + '/guides/mvc-controllers' => '/guides/controllers', ]; } } diff --git a/docs/mvc-models.md b/docs/apis.md similarity index 97% rename from docs/mvc-models.md rename to docs/apis.md index f1b46c7c6..29817277c 100644 --- a/docs/mvc-models.md +++ b/docs/apis.md @@ -1,9 +1,7 @@ --- category: Develop -previous: http-request-handling -next: mvc-views --- -# Piwik APIs (Models) +# Piwik APIs Piwik APIs serve two purposes: they serve the data used in controller methods and they [automatically expose plugin functionality through an HTTP API](/guides/piwiks-reporting-api). diff --git a/docs/mvc-controllers.md b/docs/controllers.md similarity index 99% rename from docs/mvc-controllers.md rename to docs/controllers.md index 117397574..49597a86d 100644 --- a/docs/mvc-controllers.md +++ b/docs/controllers.md @@ -1,6 +1,5 @@ --- category: Develop -previous: mvc-views --- # Controllers diff --git a/docs/http-request-handling.md b/docs/http-request-handling.md index 6d825c80d..c3c1e0bdd 100644 --- a/docs/http-request-handling.md +++ b/docs/http-request-handling.md @@ -1,12 +1,8 @@ --- category: Develop -previous: mvc-in-piwik -next: mvc-models --- # How Piwik Handles HTTP Requests -Piwik's MVC (Model-View-Controller) implementation is the first bit of code that is executed when Piwik handles an HTTP request. - Every request that is sent to Piwik's reporting side (as opposed to Piwik's tracking side) is sent to the `index.php` file in Piwik's root directory. This file creates an instance of the [FrontController](/api-reference/Piwik/FrontController) and uses it to dispatch the current request. The FrontController looks for the `module` and `action` query parameters. If `action` is missing, it takes the default value `"index"`. Piwik then invokes the matching controller method: diff --git a/docs/mvc-in-piwik.md b/docs/mvc-in-piwik.md deleted file mode 100644 index 21ae870c5..000000000 --- a/docs/mvc-in-piwik.md +++ /dev/null @@ -1,26 +0,0 @@ ---- -category: Develop -subGuides: - - http-request-handling - - mvc-models - - mvc-views - - mvc-controllers ---- -# MVC (Model-View-Controller) - -## About this guide - -**Read this guide if** - -* you'd like to know **how HTTP requests are handled by Piwik** -* you'd like to know **what a Controller or API in Piwik is** -* you'd like to know **how to show HTML generated by your plugin to Piwik users** -* you'd like to know **how to use Twig templates in Piwik** - -**Guide assumptions** - -This guide assumes that you: - -* can code in PHP, -* have a general understanding of extending Piwik (if not, read our [Getting Started](/guides/getting-started-part-1) guide), -* and have knowledge of the [Model-View-Controller pattern](http://en.wikipedia.org/wiki/Model%E2%80%93view%E2%80%93controller). diff --git a/docs/pages.md b/docs/pages.md index 3a8e804eb..4478edd45 100644 --- a/docs/pages.md +++ b/docs/pages.md @@ -7,7 +7,7 @@ A page can contain any corporate related content, key metrics, news, help pages, ## Creating a page -Creating a page means creating [a controller](/guides/mvc-controllers) and [a Twig template](/guides/mvc-views). +Creating a page means creating [a controller](/guides/controllers) and [a Twig template](/guides/views). You can use the console for this: @@ -87,5 +87,6 @@ So far you have created a page but you can still not access it. To add it to one You can also read the following guides: -- [Controllers](/guides/mvc-controllers.md) +- [Controllers](/guides/controllers.md) +- [Controllers](/guides/views.md) - [Security in Piwik](/guides/security-in-piwik.md) diff --git a/docs/piwiks-extensibility-points.md b/docs/piwiks-extensibility-points.md index e9eba4e1b..aa7c80ee8 100644 --- a/docs/piwiks-extensibility-points.md +++ b/docs/piwiks-extensibility-points.md @@ -172,7 +172,7 @@ Plugins can define certain special classes in order to extend Piwik. These class Plugins can define an **API** class (that extends [Piwik\Plugin\API](/api-reference/Piwik/Plugin/API)) to add more methods to the [Reporting API](/guides/piwiks-reporting-api). They can also define a **Controller** class to handle HTTP requests that are sent by Piwik's UI. -_Learn more about these classes in our [MVC in Piwik](/guides/mvc-in-piwik) guide._ +_Learn more about these classes in our [Controllers](/guides/controllers) or [APIs](/guides/apis) guides._ ### Archiver @@ -207,7 +207,7 @@ _Learn more about creating new report visualizations in our [Visualizing Report * To learn **about every event that Piwik posts** [read the event docs](/api-reference/events). * To learn **more about the Twig filters and functions Piwik defines** read the documentation for the [View](/api-reference/Piwik/View) class. -* To learn **about API and Controller classes** read our [MVC in Piwik](/guides/mvc-in-piwik) guide. +* To learn **about API and Controller classes** read our [Controllers](/guides/controllers) or [APIs](/guides/apis) guides. * To learn **about Archiver classes** read our [All About Analytics](/guides/all-about-analytics-data) guide. * To learn **about plugin settings** read our [Plugin Settings](/guides/plugin-settings) guide. diff --git a/docs/piwiks-reporting-api.md b/docs/piwiks-reporting-api.md index 3aef1b392..0997abd3d 100644 --- a/docs/piwiks-reporting-api.md +++ b/docs/piwiks-reporting-api.md @@ -175,5 +175,5 @@ This example uses the following API requests: ## Learn more -* To learn **how API classes are used internally** read our [Piwik APIs](/guides/mvc-models) guide. +* To learn **how API classes are used internally** read our [Piwik APIs](/guides/apis) guide. * To learn **about how to calculate a report** read our guide [All About Analytics Data](/guides/all-about-analytics-data). diff --git a/docs/mvc-views.md b/docs/views.md similarity index 98% rename from docs/mvc-views.md rename to docs/views.md index cdcf750f7..07a47a1bd 100644 --- a/docs/mvc-views.md +++ b/docs/views.md @@ -1,7 +1,5 @@ --- category: Develop -previous: mvc-models -next: mvc-controllers --- # Views diff --git a/docs/visualizing-report-data.md b/docs/visualizing-report-data.md index db44d4399..8625dc06a 100644 --- a/docs/visualizing-report-data.md +++ b/docs/visualizing-report-data.md @@ -34,7 +34,7 @@ This guide assumes that you: * can code in PHP, * have a general understanding of extending Piwik (if not, read our [Getting Started](/guides/getting-started-part-1) guide), -* and understand the purpose of [Piwik controllers](/guides/mvc-controllers) and [Piwik APIs](/guides/piwiks-reporting-api). +* and understand the purpose of [Piwik controllers](/guides/controllers) and [Piwik APIs](/guides/piwiks-reporting-api). ## Displaying Analytics Reports @@ -103,7 +103,7 @@ Properties in the [Config](/api-reference/Piwik/ViewDataTable/Config) object aff ### Displaying reports on a page -Once there exists a controller method for a report, displaying it on a page in Piwik is straightforward. Assuming you've [exposed a controller method as a menu item](/guides/mvc-controllers#using-controller-methods-in-the-piwik-ui), you can then reuse your report's controller method to include the report in the menu item page: +Once there exists a controller method for a report, displaying it on a page in Piwik is straightforward. Assuming you've [exposed a controller method as a menu item](/guides/controllers#using-controller-methods-in-the-piwik-ui), you can then reuse your report's controller method to include the report in the menu item page: // controller method exposed as a menu item public function index() @@ -367,5 +367,5 @@ To make sure your visualization can be themed, make sure any color value you use * To learn **how reports are stored and created**, read our [All About Analytics](/guides/all-about-analytics-data) guide. * To see a **full example of creating a new visualization**, see the source for the [Treemap Visualization](https://github.com/piwik/plugin-TreemapVisualization) plugin. -* To learn more about **Piwik Controllers and outputting HTML**, read about [Controllers in Piwik](/guides/mvc-controllers). +* To learn more about **Piwik Controllers and outputting HTML**, read about [Controllers in Piwik](/guides/controllers). * To learn more about **interacting with Piwik's client side JavaScript**, read our [Working with Piwik's UI](/guides/working-with-piwiks-ui) guide. From bc39bc1d939351e5581ab6d4de4450bcd9812733 Mon Sep 17 00:00:00 2001 From: Matthieu Napoli Date: Wed, 17 Dec 2014 16:32:09 +1300 Subject: [PATCH 2/3] #22 #39 Reviewed the Persistence guide --- docs/persistence-and-the-mysql-backend.md | 988 ++++++---------------- 1 file changed, 268 insertions(+), 720 deletions(-) diff --git a/docs/persistence-and-the-mysql-backend.md b/docs/persistence-and-the-mysql-backend.md index f34aec818..10b176d0d 100644 --- a/docs/persistence-and-the-mysql-backend.md +++ b/docs/persistence-and-the-mysql-backend.md @@ -1,26 +1,8 @@ --- category: Develop -title: Persistence & the MySQL Backend --- # Persistence & the MySQL Backend - - ## About this guide **Read this guide if** @@ -29,31 +11,29 @@ What's missing? (stuff in my list that was not in when I wrote the 1st draft) * you'd like to know **what information is stored when Piwik stores analytics data, log data and miscellaneous data** * you'd like to know **how Piwik uses MySQL to persist data** -**Guide assumptions** +## What is persisted -This guide assumes that you: - -* can write PHP and SQL code -* and understand how relational databases work in general and MySQL in particular. - -## What gets persisted - -Piwik persists two main types of data: log data and archive data. **Log data** is everything that Piwik tracks and **archive data** is analytics data that is cached. +Piwik persists two main types of data: log data and archive data. **Log data** is everything that Piwik tracks and **archive data** is processed analytics data that is cached. Piwik also persists other simpler forms of data including: -* websites, -* users, -* goals, -* and options. - -This guide describes exactly what information this data consists of and exactly how the MySQL backend persists it. +- websites +- users +- goals +- options _Note: Piwik uses PHP arrays to hold data that will be persisted. When we describe what information is in each persisted entity, we list properties by the string name used to store the property in the entity array._ -## Log Data Persistence +### Log data -There are four types of log data, **visits**, **action types**, **conversions** and **ecommerce items**. All log data is persisted in a similar way: new data is constantly added to the set at high volume and updates are non-existant (except for **visits**). +There are four types of log data: + +- **visits** +- **action types** +- **conversions** +- **ecommerce items** + +All log data is persisted in a similar way: new data is constantly added to the set at high volume and updates are non-existent, except for **visits**. **Visit** data is updated while visits are active. So until a visit ends it is possible that Piwik will try and update it. @@ -62,223 +42,270 @@ Log data is read when calculating analytics data and old data will sometimes be Backends must ensure that inserting new log data is as fast as possible and aggregating log data is not too slow (though obviously, faster is better). -### Visits +#### Visits + +Visits are stored in the `log_visit` table. Each visit contains the following information: -* `'idsite'`: the ID of the the website it was tracked for -* `'idvisitor'`: a visitor ID (an 8 byte binary string) -* `'visitor_localtime'`: the visit datetime in the visitor's time of day -* `'visitor_returning'`: whether the visit is the first visit for this visitor or not -* `'visitor_count_visits'`: the number of visits the visitor has made up to this one -* `'visitor_days_since_last'`: the number of days since this visitor's last visit (if any) -* `'visitor_days_since_order'`: the number of days since this visitor's last order (if any) -* `'visitor_days_since_first'`: the number of days since this visitors' first visit -* `'visit_first_action_time'`: the datetime of the visit's first action -* `'visit_last_action_time'`: the datetime of the visit's last action -* `'visit_exit_idaction_url'`: the ID of the URL action type of the visit's last action -* `'visit_exit_idaction_name'`: the ID of the page title action type of the visit's last action -* `'visit_entry_idaction_url'`: the ID of the URL action type of the visit's first action -* `'visit_entry_idaction_name'`: the ID of the page title action type of this visit's first action -* `'visit_total_actions'`: the count of actions performed during this visit -* `'visit_total_searches'`: the count of site searches performed during this visit -* `'visit_total_events'`: the count of custom events performed during this visit -* `'visit_total_time'`: the total elapsed time of the visit -* `'visit_goal_converted'`: whether this visit converted a goal or not -* `'visit_goal_buyer'`: whether the visitor ordered something during this visit or not -* `'referer_type'`: the type of this visitor's referrer. Can be one of the following values: - * **Common::REFERRER\_TYPE\_DIRECT\_ENTRY**: If set to this value, other `'referer_...'` fields have no meaning. - * **Common::REFERRER\_TYPE\_SEARCH\_ENGINE**: If set to this value, `'referer_url'` is the url of the search engine and `'referer_keyword'` is the keyword used (if we can find it). +- `idsite`: the ID of the the website it was tracked for +- `idvisitor`: a visitor ID (an 8 byte binary string) +- `visitor_localtime`: the visit datetime in the visitor's time of day +- `visitor_returning`: whether the visit is the first visit for this visitor or not +- `visitor_count_visits`: the number of visits the visitor has made up to this one +- `visitor_days_since_last`: the number of days since this visitor's last visit (if any) +- `visitor_days_since_order`: the number of days since this visitor's last order (if any) +- `visitor_days_since_first`: the number of days since this visitors' first visit +- `visit_first_action_time`: the datetime of the visit's first action +- `visit_last_action_time`: the datetime of the visit's last action +- `visit_exit_idaction_url`: the ID of the URL action type of the visit's last action +- `visit_exit_idaction_name`: the ID of the page title action type of the visit's last action +- `visit_entry_idaction_url`: the ID of the URL action type of the visit's first action +- `visit_entry_idaction_name`: the ID of the page title action type of this visit's first action +- `visit_total_actions`: the count of actions performed during this visit +- `visit_total_searches`: the count of site searches performed during this visit +- `visit_total_events`: the count of custom events performed during this visit +- `visit_total_time`: the total elapsed time of the visit +- `visit_goal_converted`: whether this visit converted a goal or not +- `visit_goal_buyer`: whether the visitor ordered something during this visit or not +- `referer_type`: the type of this visitor's referrer. Can be one of the following values: + - **Common::REFERRER\_TYPE\_DIRECT\_ENTRY**: If set to this value, other `referer_...` fields have no meaning. + - **Common::REFERRER\_TYPE\_SEARCH\_ENGINE**: If set to this value, `referer_url` is the url of the search engine and `referer_keyword` is the keyword used (if we can find it). - * **Common::REFERRER\_TYPE\_WEBSITE**: If set to this value, `'referer_url'` is the url of the website. - * **Common::REFERRER\_TYPE\_CAMPAIGN**: If set to this value, `'referer_name'` is the name of the campaign. + - **Common::REFERRER\_TYPE\_WEBSITE**: If set to this value, `referer_url` is the url of the website. + - **Common::REFERRER\_TYPE\_CAMPAIGN**: If set to this value, `referer_name` is the name of the campaign. <-- TODO: double check campaign info --> -* `'referer_name'`: referrer name; its meaning depends on the specific referrer type -* `'referer_url'`: the referrer URL; its meaning depends on the specific referrer type -* `'referer_keyword'`: the keyword used if a search engine was the referrer -* `'config_id'`: a hash of all the visit's configuration options, including the OS, browser name, browser version, browser language, IP address and all browser plugin information -* `'config_os'`: a short string identifiying the operating system used to make this visit. See [UserAgentParser](https://github.com/piwik/piwik/blob/master/libs/UserAgentParser/UserAgentParser.php) for more info -* `'config_browser_name'`: a short string identifying the browser used to make this visit. See [UserAgentParser](https://github.com/piwik/piwik/blob/master/libs/UserAgentParser/UserAgentParser.php) for more info -* `'config_browser_version'`: a string identifying the version of the browser used to make this visit -* `'config_resolution'`: a string identifying the screen resolution the visitor used to make this visit (eg, `'1024x768'`) -* `'config_pdf'`: whether the visitor's browser can view PDF files or not -* `'config_flash'`: whether the visitor's browser can view flash files or not -* `'config_java'`: whether the visitor's browser can run Java or not -* `'config_director'`: -* `'config_quicktime'`: whether the visitor's browser uses quicktime to play media files or not -* `'config_realplayer'`: whether the visitor's browser can play realplayer media files or not -* `'config_windowsmedia'`: whether the visitor's browser uses windows media player to play media files -* `'config_gears'`: -* `'config_silverlight'`: whether the visitor's browser can run silverlight programs or not -* `'config_cookie'`: whether the visitor's browser has cookies enabled or not -* `'location_ip'`: the IP address of the computer that the visit was made from. Can be [anonymized](http://piwik.org/docs/privacy/#step-1-automatically-anonymize-visitor-ips) -* `'location_browser_lang'`: a string describing the language used in the visitor's browser -* `'location_country'`: a two character string describing the country the visitor was located in while visiting the site. Set by the [UserCountry](https://github.com/piwik/piwik/tree/master/plugins/UserCountry) plugin. -* `'location_region'`: a two character string describing the region of the country the visitor was in. Set by the [UserCountry](https://github.com/piwik/piwik/tree/master/plugins/UserCountry) plugin. -* `'location_city'`: a string naming the city the visitor was in while visiting the site. Set by the [UserCountry](https://github.com/piwik/piwik/tree/master/plugins/UserCountry) plugin. -* `'location_latitude'`: the latitude of the visitor while he/she visited the site. Set by the [UserCountry](https://github.com/piwik/piwik/tree/master/plugins/UserCountry) plugin. -* `'location_longitude'`: the longitude of the visitor while he/she visited the site. Set by the [UserCountry](https://github.com/piwik/piwik/tree/master/plugins/UserCountry) plugin. -* `'custom_var_k1'`: the custom variable name of the visit in the first slot for visit custom variables. -* `'custom_var_v1'`: the custom variable value of the visit in the first slot for visit custom variables. -* `'custom_var_k2'`: the custom variable name of the visit in the second slot for visit custom variables. -* `'custom_var_v2'`: the custom variable value of the visit in the second slot for visit custom variables. -* `'custom_var_k3'`: the custom variable name of the visit in the third slot for visit custom variables. -* `'custom_var_v3'`: the custom variable value of the visit in the third slot for visit custom variables. -* `'custom_var_k4'`: the custom variable name of the visit in the fourth slot for visit custom variables. -* `'custom_var_v4'`: the custom variable value of the visit in the fourth slot for visit custom variables. -* `'custom_var_k5'`: the custom variable name of the visit in the fifth slot for visit custom variables. -* `'custom_var_v5'`: the custom variable value of the visit in the fifth slot for visit custom variables. +- `referer_name`: referrer name; its meaning depends on the specific referrer type +- `referer_url`: the referrer URL; its meaning depends on the specific referrer type +- `referer_keyword`: the keyword used if a search engine was the referrer +- `config_id`: a hash of all the visit's configuration options, including the OS, browser name, browser version, browser language, IP address and all browser plugin information +- `config_os`: a short string identifiying the operating system used to make this visit. See [UserAgentParser](https://github.com/piwik/piwik/blob/master/libs/UserAgentParser/UserAgentParser.php) for more info +- `config_browser_name`: a short string identifying the browser used to make this visit. See [UserAgentParser](https://github.com/piwik/piwik/blob/master/libs/UserAgentParser/UserAgentParser.php) for more info +- `config_browser_version`: a string identifying the version of the browser used to make this visit +- `config_resolution`: a string identifying the screen resolution the visitor used to make this visit (eg, `'1024x768'`) +- `config_pdf`: whether the visitor's browser can view PDF files or not +- `config_flash`: whether the visitor's browser can view flash files or not +- `config_java`: whether the visitor's browser can run Java or not +- `config_director`: +- `config_quicktime`: whether the visitor's browser uses quicktime to play media files or not +- `config_realplayer`: whether the visitor's browser can play realplayer media files or not +- `config_windowsmedia`: whether the visitor's browser uses windows media player to play media files +- `config_gears`: +- `config_silverlight`: whether the visitor's browser can run silverlight programs or not +- `config_cookie`: whether the visitor's browser has cookies enabled or not +- `location_ip`: the IP address of the computer that the visit was made from. Can be [anonymized](http://piwik.org/docs/privacy/#step-1-automatically-anonymize-visitor-ips) +- `location_browser_lang`: a string describing the language used in the visitor's browser +- `location_country`: a two character string describing the country the visitor was located in while visiting the site. Set by the [UserCountry](https://github.com/piwik/piwik/tree/master/plugins/UserCountry) plugin. +- `location_region`: a two character string describing the region of the country the visitor was in. Set by the [UserCountry](https://github.com/piwik/piwik/tree/master/plugins/UserCountry) plugin. +- `location_city`: a string naming the city the visitor was in while visiting the site. Set by the [UserCountry](https://github.com/piwik/piwik/tree/master/plugins/UserCountry) plugin. +- `location_latitude`: the latitude of the visitor while he/she visited the site. Set by the [UserCountry](https://github.com/piwik/piwik/tree/master/plugins/UserCountry) plugin. +- `location_longitude`: the longitude of the visitor while he/she visited the site. Set by the [UserCountry](https://github.com/piwik/piwik/tree/master/plugins/UserCountry) plugin. +- `custom_var_k1`: the custom variable name of the visit in the first slot for visit custom variables. +- `custom_var_v1`: the custom variable value of the visit in the first slot for visit custom variables. +- `custom_var_k2`: the custom variable name of the visit in the second slot for visit custom variables. +- `custom_var_v2`: the custom variable value of the visit in the second slot for visit custom variables. +- `custom_var_k3`: the custom variable name of the visit in the third slot for visit custom variables. +- `custom_var_v3`: the custom variable value of the visit in the third slot for visit custom variables. +- `custom_var_k4`: the custom variable name of the visit in the fourth slot for visit custom variables. +- `custom_var_v4`: the custom variable value of the visit in the fourth slot for visit custom variables. +- `custom_var_k5`: the custom variable name of the visit in the fifth slot for visit custom variables. +- `custom_var_v5`: the custom variable value of the visit in the fifth slot for visit custom variables. Some plugins, such as the [Provider](https://github.com/piwik/piwik/tree/master/plugins/Provider) plugin, will add new information to visits. +##### Table details + +The `index_idsite_config_datetime` index is used when trying to recognize returning visitors. + +The `index_idsite_datetime` index is used when aggregating visits. Since log aggregation occurs only for individual day periods, this index helps Piwik find the visits for a website and period quickly. Without it, log aggregation would require a table scan through the entire `log_visit` table. + + + #### Visit Actions -Visits also contain a list of actions, one for each action the visitor makes during the visit. Visit actions contain the following information: - -* `'server_time'`: the datetime the action was tracked in the server's timezone -* `'idaction_url'`: the ID of the URL action type for this action -* `'idaction_url_ref'`: the ID of the URL action type for the previous action in the visit -* `'idaction_name'`: the ID of the page title action type for this action -* `'idaction_name_ref'`: the ID of the page title action type for the previous action in the visit -* `'time_spent_ref_action'`: the amount of time spent doing the previous action -* `'custom_var_k1'`: the custom variable name of the first slot for page custom variables -* `'custom_var_v1'`: the custom variable value of the first slot for page custom variables -* `'custom_var_k2'`: the custom variable name of the second slot for page custom variables -* `'custom_var_v2'`: the custom variable value of the second slot for page custom variables -* `'custom_var_k3'`: the custom variable name of the third slot for page custom variables -* `'custom_var_v3'`: the custom variable value of the third slot for page custom variables -* `'custom_var_k4'`: the custom variable name of the fourth slot for page custom variables -* `'custom_var_v4'`: the custom variable value of the fourth slot for page custom variables -* `'custom_var_k5'`: the custom variable name of the slot for page custom variables -* `'custom_var_v5'`: the custom variable value of the slot for page custom variables -* `'custom_float'`: an unspecified float field, usually used to hold the time it took the server to serve this action - - - +Visits also contain a list of actions, one for each action the visitor makes during the visit. Those are stored in the `log_link_visit_action` table. + +Visit actions contain the following information: + +- `server_time`: the datetime the action was tracked in the server's timezone +- `idaction_url`: the ID of the URL action type for this action +- `idaction_url_ref`: the ID of the URL action type for the previous action in the visit +- `idaction_name`: the ID of the page title action type for this action +- `idaction_name_ref`: the ID of the page title action type for the previous action in the visit +- `time_spent_ref_action`: the amount of time spent doing the previous action +- `custom_var_k1`: the custom variable name of the first slot for page custom variables +- `custom_var_v1`: the custom variable value of the first slot for page custom variables +- `custom_var_k2`: the custom variable name of the second slot for page custom variables +- `custom_var_v2`: the custom variable value of the second slot for page custom variables +- `custom_var_k3`: the custom variable name of the third slot for page custom variables +- `custom_var_v3`: the custom variable value of the third slot for page custom variables +- `custom_var_k4`: the custom variable name of the fourth slot for page custom variables +- `custom_var_v4`: the custom variable value of the fourth slot for page custom variables +- `custom_var_k5`: the custom variable name of the slot for page custom variables +- `custom_var_v5`: the custom variable value of the slot for page custom variables +- `custom_float`: an unspecified float field, usually used to hold the time it took the server to serve this action + +##### Table details + +The `idsite` and `idvisitor` columns are copied from the visit action's associated visit in order to avoid having to join the log_visit table in some cases. + +The `index_idvisit` index allows Piwik to quickly query the visit actions for a visit. + +The `index_idsite_servertime` index is used when aggregating visit actions. It allows quick access to the visit actions that were tracked for a specific website during a specific period and lets us avoid a table scan through the whole table. -### Action Types +#### Action Types Action types, such as a specific URL or page title, are analyzed as well as visits. Such analysis can lead to an understanding of, for example, which pages are more relevant to visitors than others. When Piwik encounters a new action type, a new action type entity is persisted. -Action types contain the following information: - -* `'name'`: a string describing the action type. Can be a URL, a page title, campaign name or anything else. The meaning is determined by the **type** field. -* `'hash'`: a hash value calculated using the name. -* `'type'`: the action type's category. Can be one of the following values: - * **Piwik\Tracker\Action::TYPE\_PAGE\_URL**: the action type is a URL to a page on the website being tracked. - * **Piwik\Tracker\Action::TYPE\_OUTLINK**: the action type is a URL is of a link on the website being tracked. A visitor clicked it. - * **Piwik\Tracker\Action::TYPE\_DOWNLOAD**: the action type is a URL of a file that was downloaded from the website being tracked. - * **Piwik\Tracker\Action::TYPE\_PAGE\_TITLE**: the action type is the page title of a page on the website being tracked. - * **Piwik\Tracker\Action::TYPE\_ECOMMERCE\_ITEM\_SKU**: the action type is the SKU of an ecommerce item that is sold on the site. - * **Piwik\Tracker\Action::TYPE\_ECOMMERCE\_ITEM\_NAME**: the action type is the name of an ecommerce item that is sold on the site. - * **Piwik\Tracker\Action::TYPE\_ECOMMERCE\_ITEM\_CATEGORY**: the action type is the name of an ecommerce item category that is used on the site. - * **Piwik\Tracker\Action::TYPE_SITE_SEARCH**: the action type is a site search action. -* `'url_prefix'`: if the name is a URL this refers to the prefix of the URL. The prefix is removed from actual URLs so the protocol and **www.** parts of a URL are ignored during analysis. Can be the following values: - * `0`: `'http://'` - * `1`: `'http://www.'` - * `2`: `'https://'` - * `3`: `'https://www.'` +Action types are persisted in the `log_action` table and contain the following information: + +- `name`: a string describing the action type. Can be a URL, a page title, campaign name or anything else. The meaning is determined by the `type` field. +- `hash`: a hash value calculated using the name. +- `type`: the action type's category. Can be one of the following values: + - **Piwik\Tracker\Action::TYPE\_PAGE\_URL**: the action type is a URL to a page on the website being tracked. + - **Piwik\Tracker\Action::TYPE\_OUTLINK**: the action type is a URL is of a link on the website being tracked. A visitor clicked it. + - **Piwik\Tracker\Action::TYPE\_DOWNLOAD**: the action type is a URL of a file that was downloaded from the website being tracked. + - **Piwik\Tracker\Action::TYPE\_PAGE\_TITLE**: the action type is the page title of a page on the website being tracked. + - **Piwik\Tracker\Action::TYPE\_ECOMMERCE\_ITEM\_SKU**: the action type is the SKU of an ecommerce item that is sold on the site. + - **Piwik\Tracker\Action::TYPE\_ECOMMERCE\_ITEM\_NAME**: the action type is the name of an ecommerce item that is sold on the site. + - **Piwik\Tracker\Action::TYPE\_ECOMMERCE\_ITEM\_CATEGORY**: the action type is the name of an ecommerce item category that is used on the site. + - **Piwik\Tracker\Action::TYPE_SITE_SEARCH**: the action type is a site search action. +- `url_prefix`: if the name is a URL this refers to the prefix of the URL. The prefix is removed from actual URLs so the protocol and **www.** parts of a URL are ignored during analysis. Can be the following values: + - `0`: `'http://'` + - `1`: `'http://www.'` + - `2`: `'https://'` + - `3`: `'https://www.'` + +##### Table details + +The `index_type_hash` index is used during tracking to find existing action types. -### Conversions +#### Conversions When a visit action is tracked that matches a goal's conversion parameters, a conversion is created and persisted. A conversion is a tally that counts a desired action that one of your visitors took. Piwik will analyze these tallies in conjunction with the actions that caused them in order to help Piwik users understand how to make their visitors take more desired actions. -A conversion consists of the following information: - -* `'idvisit'`: the ID of the visit that caused this conversion -* `'idsite'`: the ID of the site this conversion is for -* `'idvisitor'`: the ID of the visitor that caused this conversion -* `'server_time'`: the datetime of the conversion in the server's timezone -* `'idaction_url'`: the ID of the URL action type of the visit action that caused this conversion -* `'idlink_va'`: the ID of the specific visit action that resulted in this conversion -* `'referer_visit_server_date'`: -* `'url'`: the URL that caused this conversion to be tracked -* `'idgoal'`: the ID of the goal this conversion is for -* `'idorder'`: if this conversion is for an ecommerce order or abandoned cart, this will be the order's ID -* `'items'`: if this conversion is for an ecommerce order or abandoned cart, this will be the number of items in the order/cart -* `'revenue'`: if this conversion is for an ecommerce order or abandoned cart, this is the total revenue generated by the order -* `'revenue_subtotal'`: if this conversion is for an ecommerce order or abandoned cart, this is the total cost of the items in the order/cart -* `'revenue_tax'`: if this conversion is for an ecommerce order or abandoned cart, this is the total tax applied to the items in the order/cart -* `'revenue_shipping'`: if this conversion is for an ecommerce order or abandoned cart, this is the total cost of shipping -* `'revenue_discount'`: if this conversion is for an ecommerce order or abandoned cart, this is the total discount applied to the order +Conversions are stored in the `log_conversion` table and consist of the following information: + +- `idvisit`: the ID of the visit that caused this conversion +- `idsite`: the ID of the site this conversion is for +- `idvisitor`: the ID of the visitor that caused this conversion +- `server_time`: the datetime of the conversion in the server's timezone +- `idaction_url`: the ID of the URL action type of the visit action that caused this conversion +- `idlink_va`: the ID of the specific visit action that resulted in this conversion +- `referer_visit_server_date`: +- `url`: the URL that caused this conversion to be tracked +- `idgoal`: the ID of the goal this conversion is for +- `idorder`: if this conversion is for an ecommerce order or abandoned cart, this will be the order's ID +- `items`: if this conversion is for an ecommerce order or abandoned cart, this will be the number of items in the order/cart +- `revenue`: if this conversion is for an ecommerce order or abandoned cart, this is the total revenue generated by the order +- `revenue_subtotal`: if this conversion is for an ecommerce order or abandoned cart, this is the total cost of the items in the order/cart +- `revenue_tax`: if this conversion is for an ecommerce order or abandoned cart, this is the total tax applied to the items in the order/cart +- `revenue_shipping`: if this conversion is for an ecommerce order or abandoned cart, this is the total cost of shipping +- `revenue_discount`: if this conversion is for an ecommerce order or abandoned cart, this is the total discount applied to the order + +##### Table details + +All extra information stored in the table that is not listed above is replicated from the Visit entity this conversion is for. This allows us to avoid joining the `log_visit` table in certain cases. + +The `index_idsite_datetime` index is used when aggregating conversions. It allows quick access to the conversions that were tracked for a specific website during a specific period and lets us avoid a table scan through the entire table. -### Ecommerce items (aka, conversion items) +#### Ecommerce items (aka conversion items) -An ecommerce item is an item that was sold in an ecommerce order or abandoned in an abandoned cart. They consist of the following information: +An ecommerce item is an item that was sold in an ecommerce order or abandoned in an abandoned cart. -* `'server_time'`: -* `'idorder'`: the ID of the order that this ecommerce item is a part of -* `'idaction_sku'`: the ID of the action type entity that contains the item's SKU -* `'idaction_name'`: the ID of the action type entity that contains the ecommerce item's name -* `'idaction_category'`: the ID of an action type entity that contains a category for this ecommerce item -* `'idaction_category2'`: the ID of an action type entity that contains a category for this ecommerce item -* `'idaction_category3'`: the ID of an action type entity that contains a category for this ecommerce item -* `'idaction_category4'`: the ID of an action type entity that contains a category for this ecommerce item -* `'idaction_category5'`: the ID of an action type entity that contains a category for this ecommerce item -* `'price'`: the price of this individual ecommerce item -* `'quantity'`: the amount of this item that were present in the associated ecommerce order -* `'deleted'`: whether this item was removed from the order or not +Ecommerce items are stored in the `log_conversion_item` table and consist of the following information: - +- `server_time`: +- `idorder`: the ID of the order that this ecommerce item is a part of +- `idaction_sku`: the ID of the action type entity that contains the item's SKU +- `idaction_name`: the ID of the action type entity that contains the ecommerce item's name +- `idaction_category`: the ID of an action type entity that contains a category for this ecommerce item +- `idaction_category2`: the ID of an action type entity that contains a category for this ecommerce item +- `idaction_category3`: the ID of an action type entity that contains a category for this ecommerce item +- `idaction_category4`: the ID of an action type entity that contains a category for this ecommerce item +- `idaction_category5`: the ID of an action type entity that contains a category for this ecommerce item +- `price`: the price of this individual ecommerce item +- `quantity`: the amount of this item that were present in the associated ecommerce order +- `deleted`: whether this item was removed from the order or not -## Archive Data Persistence +##### Table details -Archive data consists of **metrics** and **reports**. Metrics are numeric values and are stored as such. Reports are stored in [DataTable](/api-reference/Piwik/DataTable) instances and persisted as compressed binary strings. +The `idsite`, `idvisitor`, `server_time` and `idvisit` columns are copied from the Conversion entity this Ecommerce Item belongs to. They are copied so we can aggregate Ecommerce Items without having to join other tables. + +The `index_idsite_servertime` index is used when aggregating ecommerce items. It allows quick access to the items that were tracked for a specific website and during a specific period and lets us avoid a table scan through the entire table. + +### Archive data -Archive data is associated with the website ID, period and segment it is for along with the data's identifying name. All archive data will be queried many times by this information. Currently, the segment is hashed and attached to the end of the metric name. +Archive data consists of **metrics** and **reports**. Metrics are numeric values and are stored as such. Reports are stored in [DataTable](/api-reference/Piwik/DataTable) instances and persisted as compressed binary strings. -Archive data is also persisted with the current date & time so it is possible to know how old some data is. +Archive data is associated with the website ID, period and segment it is for along with the data's identifying name. All archive data will be queried many times by this information. Currently, the segment is hashed and attached to the end of the metric name. Archive data is also persisted with the current date and time so it is possible to know how old some data is. All archive data will contain the following information: -* `'idarchive'`: An ID that is shared with all pieces of archive data that were archived with the same website ID, period and segment. -* `'name'`: The name of the report or metric. If a segment is used, a hash of the segment is appended to the name. -* `'idsite'`: The ID of the website this archive data is for. -* `'date1'`: The first date in the period this archive data is for. -* `'date2'`: The last date in the period this archive data is for. -* `'period'`: The type of period this archive data is for. Can be one of the following values: +- `idarchive`: An ID that is shared with all pieces of archive data that were archived with the same website ID, period and segment. +- `name`: The name of the report or metric. If a segment is used, a hash of the segment is appended to the name. +- `idsite`: The ID of the website this archive data is for. +- `date1`: The first date in the period this archive data is for. +- `date2`: The last date in the period this archive data is for. +- `period`: The type of period this archive data is for. Can be one of the following values: * `1`: for **day** periods. * `2`: for **week** periods. * `3`: for **month** periods. * `4`: for **year** periods. * `5`: for **range** periods. -* `'ts_archived'`: The datetime the archive data was cached. -* `'value'`: Either a numeric value (for a metric) or a binary string (for a report). +- `ts_archived`: The datetime the archive data was cached. +- `value`: Either a numeric value (for a metric) or a binary string (for a report). + +#### Table details + +Archive data is stored in tables partitioned by months, and missing tables are created automatically. Reports that aggregate visits from January 2012 will be held in a different table from reports that aggregate visits from February 2012. + +Piwik creates two types of archive tables, one for each type of archive data. The `archive_numeric` tables store metric data and the `archive_blob` tables store report data. Tables are suffixed with the year and the month: for example the `archive_numeric` table for January 2012 would be named `archive_numeric_2012_01`. -## Other Data Persistence +In `archive_numeric` tables: + +- the `index_idsite_dates_period` index is used when querying archive data. It lets Piwik quickly query archive data for any site and period, and for data that was archived past a certain date-time. +- the `index_period_archived` index is used when [purging archive data](http://piwik.org/docs/managing-your-databases-size/). It allows Piwik to quickly find archive data for a specific period that is old enough to be purged. + +In `archive_blob` tables: + +- the `index_period_archived` index is used in the same way as the one in `archive_numeric` tables +- `archive_blob` tables do not have an index that makes it fast to query for rows by site, period and archived date. This is because they should not be queried this way. Instead, the `archive_numeric` table should be queried and the `idarchive` values saved. These values can be used to query data in the `archive_blob` table. + +### Other data -### Websites (aka sites) +#### Websites (aka sites) **Site** entities contain information regarding a website whose visits are tracked. There won't be nearly as many of these as there are visits and archive data entries, but they will be queried often. Every reporting request (either through the [Reporting API](/guides/piwiks-reporting-api) or through Piwik's UI) will query one or more site entities. The tracker will only query site data if the [tracker cache](/guides/all-about-tracking#the-tracker-cache) needs to be updated. For most tracking requests, site data will not be queried (thus resulting in greater performance for the tracker). -Site entities contain the following information: - -* `'idsite'`: the unique ID of the website. -* `'name'`: the name of the website. -* `'main_url'`: the main URL visitors should use to access the website. -* `'ts_created'`: the date & time the site entity was persisted. -* `'ecommerce'`: `1` if the site is an ecommerce site, `0` if not. -* `'sitesearch'`: `1` if the site contains an internal search feature, `0` if not. -* `'sitesearch_keyword_parameters'`: the query parameters the site uses to hold internal site search keywords. This is a comma separated list. -* `'sitesearch_category_parameters'`: the query parameters the site uses to hold internal site search categories. This is a comma separated list. -* `'timezone'`: the timezone of the website. -* `'currency'`: the currency the website uses. Only valid if the site is an ecommerce site. -* `'excluded_ips'`: a comma separated list of IP addresses or IP address ranges. Visits that come from one of these IP addresses will not be tracked for this website. -* `'excluded_parameters'`: a comma separated list of query parameter names. These query parameters will be removed from page URLs before visits and actions are tracked. -* `'excluded_user_agents'`: a comma separated list of strings. Visits with a user agent that contains one of these strings will not be tracked for this website. -* `'group'`: -* `'keep_url_fragment'`: `1` if the URL fragment (everything after the `#`) should be kept in the URL when tracking actions, `0` if not. - -Site entities also contain a list of extra URLs that can be used to access the website. These are not stored within site entities themselves. +Site entities are stored in the `site` table and contain the following information: + +- `idsite`: the unique ID of the website. +- `name`: the name of the website. +- `main_url`: the main URL visitors should use to access the website. +- `ts_created`: the date & time the site entity was persisted. +- `ecommerce`: `1` if the site is an ecommerce site, `0` if not. +- `sitesearch`: `1` if the site contains an internal search feature, `0` if not. +- `sitesearch_keyword_parameters`: the query parameters the site uses to hold internal site search keywords. This is a comma separated list. +- `sitesearch_category_parameters`: the query parameters the site uses to hold internal site search categories. This is a comma separated list. +- `timezone`: the timezone of the website. +- `currency`: the currency the website uses. Only valid if the site is an ecommerce site. +- `excluded_ips`: a comma separated list of IP addresses or IP address ranges. Visits that come from one of these IP addresses will not be tracked for this website. +- `excluded_parameters`: a comma separated list of query parameter names. These query parameters will be removed from page URLs before visits and actions are tracked. +- `excluded_user_agents`: a comma separated list of strings. Visits with a user agent that contains one of these strings will not be tracked for this website. +- `group`: +- `keep_url_fragment`: `1` if the URL fragment (everything after the `#`) should be kept in the URL when tracking actions, `0` if not. + +Site entities also contain a list of extra URLs that can be used to access the website. These are not stored within site entities themselves: they are stored in the `site_url` table. Site entity data access occurs primarily through the [Piwik\Site](/api-reference/Piwik/Site) class. Anything that cannot be queried through that class can be queried through the [SitesManager](https://github.com/piwik/piwik/tree/master/plugins/SitesManager) core plugin. @@ -287,79 +314,67 @@ Site entity data access occurs primarily through the [Piwik\Site](/api-reference Each site has an optional list of goals. A goal is a desired action that a website visitor should take. -The following information is stored in a goal entity: - -* `'idsite'`: The ID of the website this goal belongs to. -* `'idgoal'`: The ID for this goal (unique only among goals for this website). -* `'name'`: The name of this goal. -* `'match_attribute'`: string describing what part of the request should be matched against when converting a goal. Can be one of the following values: - * `'manually'`: the goal is converted by [manual conversion requests](/api-reference/tracking-javascript#manually-trigger-a-conversion-for-a-goal). - * `'url'`: the goal is converted based on what the action URL contains. - * `'title'`: the goal is converted based on what the action page title contains. - * `'file'`: the goal is converted based on what the filename of a downloaded file contains. - * `'external_website'`: the goal is converted based on what the URL of an outlink contains. -* `'pattern'`: the pattern to use when checking if a goal is converted. -* `'pattern_type'`: the type of pattern matching to use when checking if a goal is converted. - * `'contains'`: the goal is converted if the match attribute contains the pattern. - * `'exact'`: the goal is converted if the match attribute equals the pattern exactly. - * `'regex'`: the goal is converted if the match attribute is a regex match with the pattern. -* `'case_sensitive'`: `1` if the matching should be case sensitive, `0` if otherwise. -* `'allow_multiple'`: `1` if multiple conversions are allowed per visit, `0` if otherwise. -* `'revenue'`: the amount of revenue a conversion generates (if any). -* `'deleted'`: `1` if this goal was deleted by a Piwik user, `0` if not. +Goals are stored in the `goal` table and contain the following information: + +- `idsite`: The ID of the website this goal belongs to. +- `idgoal`: The ID for this goal (unique only among goals for this website). +- `name`: The name of this goal. +- `match_attribute`: string describing what part of the request should be matched against when converting a goal. Can be one of the following values: + - `manually`: the goal is converted by [manual conversion requests](/api-reference/tracking-javascript#manually-trigger-a-conversion-for-a-goal). + - `url`: the goal is converted based on what the action URL contains. + - `title`: the goal is converted based on what the action page title contains. + - `file`: the goal is converted based on what the filename of a downloaded file contains. + - `external_website`: the goal is converted based on what the URL of an outlink contains. +- `pattern`: the pattern to use when checking if a goal is converted. +- `pattern_type`: the type of pattern matching to use when checking if a goal is converted. + - `contains`: the goal is converted if the match attribute contains the pattern. + - `exact`: the goal is converted if the match attribute equals the pattern exactly. + - `regex`: the goal is converted if the match attribute is a regex match with the pattern. +- `case_sensitive`: `1` if the matching should be case sensitive, `0` if otherwise. +- `allow_multiple`: `1` if multiple conversions are allowed per visit, `0` if otherwise. +- `revenue`: the amount of revenue a conversion generates (if any). +- `deleted`: `1` if this goal was deleted by a Piwik user, `0` if not. _Note: The ecommerce and abandoned cart goals are two special goals with special IDs. Ecommerce websites automatically have these goals._ -### Users +#### Users + +User entities describe each Piwik user except the Super User. They are persisted in the `users` table. -User entities describe each Piwik user except the Super User. The following information is stored in a user entity: +The following information is stored in a user entity: -* `'login'`: the user's login handle. -* `'password'`: a hash of the user's password. -* `'alias'`: the user's alias if any. This value is displayed instead of the login handle when addressing the user in the UI. -* `'email'`: the user's email address. -* `'token_auth'`: a user's token auth. -* `'date_registered'`: the date the user data was persisted. +- `login`: he user's login handle. +- `password'`: a hash of the user's password. +- `alias`: the user's alias if any. This value is displayed instead of the login handle when addressing the user in the UI. +- `email`: the user's email address. +- `token_auth`: a user's token auth. +- `date_registered`: the date the user data was persisted. -User data is read on every UI and [Reporting API](/guides/piwiks-reporting-api) request. <~-- TODO: the tracker uses a token_auth, does that mean it reads user data? or is that data cached? or is it just for the superuser so the config is used? --> +User data is read on every UI and [Reporting API](/guides/piwiks-reporting-api) request. There is some user related information that is not stored directly in user entities. They are descirbed below: -#### User access +#### User permissions -Users can be allowed and disallowed access to websites. Piwik persists each user's access level for each website they have access to. If they don't have access to a website, then no information regarding that user + website combination will be persisted. +Users can be allowed and disallowed access to websites. Piwik persists each user's access level for each website they have access to in the `access` table. -An access level can be one of the following values: - -* `'view'`: The user has view access but cannot add goals or change any settings for the site. -* `'admin'`: The user can view analytics data for the site and add goals or change settings for the site. +To read more about this, read the [Permissions](/guides/permissions) guide. #### User language choice -Piwik will also persist each user's language of choice. User logins are associated with the name of the language (written in the chosen language as opposed to English). +Piwik will also persist each user's language of choice. User logins are associated with the name of the language (written in the chosen language as opposed to English). This association and the persistence logic is implemented by the [LanguagesManager](https://github.com/piwik/piwik/tree/master/plugins/LanguagesManager) plugin. -### [Options](/api-reference/Piwik/Option) +#### Options -Options are key-value pairs where the key is a string and the value is a another string (possibly bigger and possibly binary). They are queried on every UI and [Reporting API](/guides/piwiks-reporting-api) request. The tracker will [cache](/guides/all-about-tracking#the-tracker-cache) relevant option values and so will only query options when the cache needs updating. +[Options](/api-reference/Piwik/Option) are key-value pairs where the key is a string and the value is a another string (possibly bigger and possibly binary). They are queried on every UI and [Reporting API](/guides/piwiks-reporting-api) request. The tracker will [cache](/guides/all-about-tracking#the-tracker-cache) relevant option values and so will only query options when the cache needs updating. Some options should be loaded on every non-tracking request. These options have a special **autoload** property set to `1`. -## The Database Logging Backend - -Piwik includes a [logging utility](/api-reference/Piwik/Log) that can be used to aid development or troubleshoot live Piwik installs. The utility can output log messages to multiple backends, including the database. - -Every log entry contains the following information (all of which is persisted): - -* `'tag'`: A string used to categorize the log entry. This will either be the name of the plugin that logged a message or, if it cannot be found, the name of the class. -* `'timestamp'`: When the log entry was made. -* `'level'`: The log level (as a string). Describes the severity of the entry. See [Piwik\Log](/api-reference/Piwik/Log) for a list of levels. -* `'message'`: The log entry's message. - ## Plugin Persistence Plugins can provide persistence for new data if they need to. At the moment, since MySQL is the only supported backend, this means directly adding and using new tables. @@ -451,473 +466,6 @@ Plugins should remove the column in the [uninstall](/api-reference/Piwik/Plugin# -## The MySQL Backend - -This section lists each MySQL table used to store the data described above and details everything we did to them to make Piwik run as fast as possible. - -_Note: All table names in the MySQL database are prefixed with the value in the `[database] tables_prefix` INI config._ This is to ensure that non-piwik tables do not get overwritten or used by accident. It also makes it possible to store multiple instances of Piwik in one datbase. - -### Log Data Tables - -#### log_visit - -This table stores [Visit entities](#log-data-persistence-visits). - -The `CREATE TABLE` SQL for this table is: - -```sql -CREATE TABLE log_visit ( - idvisit INTEGER(10) UNSIGNED NOT NULL AUTO_INCREMENT, - idsite INTEGER(10) UNSIGNED NOT NULL, - idvisitor BINARY(8) NOT NULL, - visitor_localtime TIME NOT NULL, - visitor_returning TINYINT(1) NOT NULL, - visitor_count_visits SMALLINT(5) UNSIGNED NOT NULL, - visitor_days_since_last SMALLINT(5) UNSIGNED NOT NULL, - visitor_days_since_order SMALLINT(5) UNSIGNED NOT NULL, - visitor_days_since_first SMALLINT(5) UNSIGNED NOT NULL, - visit_first_action_time DATETIME NOT NULL, - visit_last_action_time DATETIME NOT NULL, - visit_exit_idaction_url INTEGER(11) UNSIGNED NULL DEFAULT 0, - visit_exit_idaction_name INTEGER(11) UNSIGNED NOT NULL, - visit_entry_idaction_url INTEGER(11) UNSIGNED NOT NULL, - visit_entry_idaction_name INTEGER(11) UNSIGNED NOT NULL, - visit_total_actions SMALLINT(5) UNSIGNED NOT NULL, - visit_total_searches SMALLINT(5) UNSIGNED NOT NULL, - visit_total_events SMALLINT(5) UNSIGNED NOT NULL, - visit_total_time SMALLINT(5) UNSIGNED NOT NULL, - visit_goal_converted TINYINT(1) NOT NULL, - visit_goal_buyer TINYINT(1) NOT NULL, - referer_type TINYINT(1) UNSIGNED NULL, - referer_name VARCHAR(70) NULL, - referer_url TEXT NOT NULL, - referer_keyword VARCHAR(255) NULL, - config_id BINARY(8) NOT NULL, - config_os CHAR(3) NOT NULL, - config_browser_name VARCHAR(10) NOT NULL, - config_browser_version VARCHAR(20) NOT NULL, - config_resolution VARCHAR(9) NOT NULL, - config_pdf TINYINT(1) NOT NULL, - config_flash TINYINT(1) NOT NULL, - config_java TINYINT(1) NOT NULL, - config_director TINYINT(1) NOT NULL, - config_quicktime TINYINT(1) NOT NULL, - config_realplayer TINYINT(1) NOT NULL, - config_windowsmedia TINYINT(1) NOT NULL, - config_gears TINYINT(1) NOT NULL, - config_silverlight TINYINT(1) NOT NULL, - config_cookie TINYINT(1) NOT NULL, - location_ip VARBINARY(16) NOT NULL, - location_browser_lang VARCHAR(20) NOT NULL, - location_country CHAR(3) NOT NULL, - location_region char(2) DEFAULT NULL, - location_city varchar(255) DEFAULT NULL, - location_latitude float(10, 6) DEFAULT NULL, - location_longitude float(10, 6) DEFAULT NULL, - custom_var_k1 VARCHAR(200) DEFAULT NULL, - custom_var_v1 VARCHAR(200) DEFAULT NULL, - custom_var_k2 VARCHAR(200) DEFAULT NULL, - custom_var_v2 VARCHAR(200) DEFAULT NULL, - custom_var_k3 VARCHAR(200) DEFAULT NULL, - custom_var_v3 VARCHAR(200) DEFAULT NULL, - custom_var_k4 VARCHAR(200) DEFAULT NULL, - custom_var_v4 VARCHAR(200) DEFAULT NULL, - custom_var_k5 VARCHAR(200) DEFAULT NULL, - custom_var_v5 VARCHAR(200) DEFAULT NULL, - PRIMARY KEY(idvisit), - INDEX index_idsite_config_datetime (idsite, config_id, visit_last_action_time), - INDEX index_idsite_datetime (idsite, visit_last_action_time), - INDEX index_idsite_idvisitor (idsite, idvisitor) -) DEFAULT CHARSET=utf8; -``` - -The **index\_idsite\_config_datetime** index is used when trying to recognize returning visitors. - -The **index\_idsite\_datetime** index is used when aggregating visits. Since log aggregation occurs only for individual day periods, this index helps Piwik find the visits for a website and period quickly. Without it, log aggregation would require a table scan through the entire log_visit table. - - - -#### log\_link\_visit\_action - -This table stores [Visit Action entities](#log-data-persistence-visit-actions). - -The `CREATE TABLE` SQL for this table is: - -```sql -CREATE TABLE log_link_visit_action ( - idlink_va INTEGER(11) UNSIGNED NOT NULL AUTO_INCREMENT, - idsite int(10) UNSIGNED NOT NULL, - idvisitor BINARY(8) NOT NULL, - server_time DATETIME NOT NULL, - idvisit INTEGER(10) UNSIGNED NOT NULL, - idaction_url INTEGER(10) UNSIGNED DEFAULT NULL, - idaction_url_ref INTEGER(10) UNSIGNED NULL DEFAULT 0, - idaction_name INTEGER(10) UNSIGNED, - idaction_name_ref INTEGER(10) UNSIGNED NOT NULL, - idaction_event_category INTEGER(10) UNSIGNED DEFAULT NULL, - idaction_event_action INTEGER(10) UNSIGNED DEFAULT NULL, - time_spent_ref_action INTEGER(10) UNSIGNED NOT NULL, - custom_var_k1 VARCHAR(200) DEFAULT NULL, - custom_var_v1 VARCHAR(200) DEFAULT NULL, - custom_var_k2 VARCHAR(200) DEFAULT NULL, - custom_var_v2 VARCHAR(200) DEFAULT NULL, - custom_var_k3 VARCHAR(200) DEFAULT NULL, - custom_var_v3 VARCHAR(200) DEFAULT NULL, - custom_var_k4 VARCHAR(200) DEFAULT NULL, - custom_var_v4 VARCHAR(200) DEFAULT NULL, - custom_var_k5 VARCHAR(200) DEFAULT NULL, - custom_var_v5 VARCHAR(200) DEFAULT NULL, - custom_float FLOAT NULL DEFAULT NULL, - PRIMARY KEY(idlink_va), - INDEX index_idvisit(idvisit), - INDEX index_idsite_servertime ( idsite, server_time ) -) DEFAULT CHARSET=utf8 -``` - -The `idsite` and `idvisitor` columns are copied from the visit action's associated visit in order to avoid having to join the log_visit table in some cases. - -The **index\_idvisit** index allows Piwik to quickly query the visit actions for a visit. - -The **index\_idsite\_servertime** index is used when aggregating visit actions. It allows quick access to the visit actions that were tracked for a specific website during a specific period and lets us avoid a table scan through the whole table. - -#### log_action - -This table stores [Action Type entities](#log-data-persistence-action-types). - -The `CREATE TABLE` SQL for this table is: - -```sql -CREATE TABLE log_action ( - idaction INTEGER(10) UNSIGNED NOT NULL AUTO_INCREMENT, - name TEXT, - hash INTEGER(10) UNSIGNED NOT NULL, - type TINYINT UNSIGNED NULL, - url_prefix TINYINT(2) NULL, - PRIMARY KEY(idaction), - INDEX index_type_hash (type, hash) -) DEFAULT CHARSET=utf8 -``` - -The **index\_type\_hash** index is used during tracking to find existing action types. - -#### log_conversion - -This table stores [Conversion entities](#log-data-persistence-conversions). - -The `CREATE TABLE` SQL for this table is: - -```sql -CREATE TABLE `log_conversion` ( - idvisit int(10) unsigned NOT NULL, - idsite int(10) unsigned NOT NULL, - idvisitor BINARY(8) NOT NULL, - server_time datetime NOT NULL, - idaction_url int(11) default NULL, - idlink_va int(11) default NULL, - referer_visit_server_date date default NULL, - referer_type int(10) unsigned default NULL, - referer_name varchar(70) default NULL, - referer_keyword varchar(255) default NULL, - visitor_returning tinyint(1) NOT NULL, - visitor_count_visits SMALLINT(5) UNSIGNED NOT NULL, - visitor_days_since_first SMALLINT(5) UNSIGNED NOT NULL, - visitor_days_since_order SMALLINT(5) UNSIGNED NOT NULL, - location_country char(3) NOT NULL, - location_region char(2) DEFAULT NULL, - location_city varchar(255) DEFAULT NULL, - location_latitude float(10, 6) DEFAULT NULL, - location_longitude float(10, 6) DEFAULT NULL, - url text NOT NULL, - idgoal int(10) NOT NULL, - buster int unsigned NOT NULL, - idorder varchar(100) default NULL, - items SMALLINT UNSIGNED DEFAULT NULL, - revenue float default NULL, - revenue_subtotal float default NULL, - revenue_tax float default NULL, - revenue_shipping float default NULL, - revenue_discount float default NULL, - custom_var_k1 VARCHAR(200) DEFAULT NULL, - custom_var_v1 VARCHAR(200) DEFAULT NULL, - custom_var_k2 VARCHAR(200) DEFAULT NULL, - custom_var_v2 VARCHAR(200) DEFAULT NULL, - custom_var_k3 VARCHAR(200) DEFAULT NULL, - custom_var_v3 VARCHAR(200) DEFAULT NULL, - custom_var_k4 VARCHAR(200) DEFAULT NULL, - custom_var_v4 VARCHAR(200) DEFAULT NULL, - custom_var_k5 VARCHAR(200) DEFAULT NULL, - custom_var_v5 VARCHAR(200) DEFAULT NULL, - PRIMARY KEY (idvisit, idgoal, buster), - UNIQUE KEY unique_idsite_idorder (idsite, idorder), - INDEX index_idsite_datetime ( idsite, server_time ) -) DEFAULT CHARSET=utf8 -``` - -All extra information stored in this table that is not listed for the conversion entity above is replicated from the Visit entity this conversion is for. This allows us to avoid joining the log_visit table in certain cases. - -The **index\_idsite\_datetime** index is used when aggregating conversions. It allows quick access to the conversions that were tracked for a specific website during a specific period and lets us avoid a table scan through the entire table. - -#### log\_conversion\_item - -This table stores [Ecommerce Item entities](#log-data-persistence-ecommerce-items). - -The `CREATE TABLE` SQL for this table is: - -```sql -CREATE TABLE `log_conversion_item` ( - idsite int(10) UNSIGNED NOT NULL, - idvisitor BINARY(8) NOT NULL, - server_time DATETIME NOT NULL, - idvisit INTEGER(10) UNSIGNED NOT NULL, - idorder varchar(100) NOT NULL, - - idaction_sku INTEGER(10) UNSIGNED NOT NULL, - idaction_name INTEGER(10) UNSIGNED NOT NULL, - idaction_category INTEGER(10) UNSIGNED NOT NULL, - idaction_category2 INTEGER(10) UNSIGNED NOT NULL, - idaction_category3 INTEGER(10) UNSIGNED NOT NULL, - idaction_category4 INTEGER(10) UNSIGNED NOT NULL, - idaction_category5 INTEGER(10) UNSIGNED NOT NULL, - price FLOAT NOT NULL, - quantity INTEGER(10) UNSIGNED NOT NULL, - deleted TINYINT(1) UNSIGNED NOT NULL, - - PRIMARY KEY(idvisit, idorder, idaction_sku), - INDEX index_idsite_servertime ( idsite, server_time ) -) DEFAULT CHARSET=utf8 -``` - -The `idsite`, `idvisitor`, `server_time` and `idvisit` columns are copied from the Conversion entity this Ecommerce Item belongs to. They are copied so we can aggregate Ecommerce Items without having to join other tables. - -The **index\_idsite\_servertime** index is used when aggregating ecommerce items. It allows quick access to the items that were tracked for a specific website and during a specific period and lets us avoid a table scan through the entire table. - -### Archive Tables - -In the MySQL backend archive data is partitioned by the month the archive data is for. So reports that aggregate visits from January, 2012 will be held in a different table from reports that aggregate visits from February 2012. - -Piwik creates two types of archive tables, one for each type of archive data. The **archive\_numeric** tables store metric data and the **archive\_blob** tables store report data. - -Archive tables are created dynamically. When Piwik needs to query or insert archive data for a certain month and it cannot find the table that holds this data, the table is created. - -The year and month of an archive table is appended as the suffix to the name. So the **archive\_numeric** table for January, 2012 would have the name **archive\_numeric\_2012\_01**. - -#### archive_numeric - -The `CREATE TABLE` SQL for this table is: - -```sql -CREATE TABLE archive_numeric_YYYY_MM ( - idarchive INTEGER UNSIGNED NOT NULL, - name VARCHAR(255) NOT NULL, - idsite INTEGER UNSIGNED NULL, - date1 DATE NULL, - date2 DATE NULL, - period TINYINT UNSIGNED NULL, - ts_archived DATETIME NULL, - value DOUBLE NULL, - PRIMARY KEY(idarchive, name), - INDEX index_idsite_dates_period(idsite, date1, date2, period, ts_archived), - INDEX index_period_archived(period, ts_archived) -) DEFAULT CHARSET=utf8 -``` - -The **index\_idsite\_dates\_period** index is used when querying archive data. It lets Piwik quickly query archive data for any site and period, and for data that was archived past a certain date-time. - -The **index\_period\_archived** index is used when [purging archive data](http://piwik.org/docs/managing-your-databases-size/). It allows Piwik to quickly find archive data for a specific period that is old enough to be purged. - -#### archive_blob - -The `CREATE TABLE` SQL for this table is: - -```sql -CREATE TABLE archive_blob ( - idarchive INTEGER UNSIGNED NOT NULL, - name VARCHAR(255) NOT NULL, - idsite INTEGER UNSIGNED NULL, - date1 DATE NULL, - date2 DATE NULL, - period TINYINT UNSIGNED NULL, - ts_archived DATETIME NULL, - value MEDIUMBLOB NULL, - PRIMARY KEY(idarchive, name), - INDEX index_period_archived(period, ts_archived) -) DEFAULT CHARSET=utf8 -``` - -The **index\_period\_archived** index is used in the same way as the one in **archive\_numeric** tables. - -**archive\_blob** tables do not have an index that makes it fast to query for rows by site, period and archived date. This is because they should not be queried this way. Instead, the **archive\_numeric** table should be queried and the `idarchive` values saved. These values can be used to query data in the **archive\_blob** table. - -### Other Tables - -#### site - -This table stores [Website](#other-data-site) entities. - -The `CREATE TABLE` SQL for this table is: - -```sql -CREATE TABLE site ( - idsite INTEGER(10) UNSIGNED NOT NULL AUTO_INCREMENT, - name VARCHAR(90) NOT NULL, - main_url VARCHAR(255) NOT NULL, - ts_created TIMESTAMP NULL, - ecommerce TINYINT DEFAULT 0, - sitesearch TINYINT DEFAULT 1, - sitesearch_keyword_parameters TEXT NOT NULL, - sitesearch_category_parameters TEXT NOT NULL, - timezone VARCHAR( 50 ) NOT NULL, - currency CHAR( 3 ) NOT NULL, - excluded_ips TEXT NOT NULL, - excluded_parameters TEXT NOT NULL, - excluded_user_agents TEXT NOT NULL, - `group` VARCHAR(250) NOT NULL, - keep_url_fragment TINYINT NOT NULL DEFAULT 0, - PRIMARY KEY(idsite) -) DEFAULT CHARSET=utf8 -``` - -##### site_url - -This table stores extra URLs for [Website](#other-data-site) entities. - -The `CREATE TABLE` SQL for this table is: - -```sql -CREATE TABLE site_url ( - idsite INTEGER(10) UNSIGNED NOT NULL, - url VARCHAR(255) NOT NULL, - PRIMARY KEY(idsite, url) -) DEFAULT CHARSET=utf8 -``` - -#### goal - -This table stores [Goal](#other-data-goal) entities. - -The `CREATE TABLE` SQL for this table is: - -```sql -CREATE TABLE `goal` ( - `idsite` int(11) NOT NULL, - `idgoal` int(11) NOT NULL, - `name` varchar(50) NOT NULL, - `match_attribute` varchar(20) NOT NULL, - `pattern` varchar(255) NOT NULL, - `pattern_type` varchar(10) NOT NULL, - `case_sensitive` tinyint(4) NOT NULL, - `allow_multiple` tinyint(4) NOT NULL, - `revenue` float NOT NULL, - `deleted` tinyint(4) NOT NULL default '0', - PRIMARY KEY (`idsite`,`idgoal`) -) DEFAULT CHARSET=utf8 -``` - -#### users - -This table stores [User](#other-data-user) entities. - -The `CREATE TABLE` SQL for this table is: - -```sql -CREATE TABLE user ( - login VARCHAR(100) NOT NULL, - password CHAR(32) NOT NULL, - alias VARCHAR(45) NOT NULL, - email VARCHAR(100) NOT NULL, - token_auth CHAR(32) NOT NULL, - date_registered TIMESTAMP NULL, - PRIMARY KEY(login), - UNIQUE KEY uniq_keytoken(token_auth) -) DEFAULT CHARSET=utf8 -``` - -##### access - -This table stores [User Access information](#other-data-user-access). - -The `CREATE TABLE` SQL for this table is: - -```sql -CREATE TABLE access ( - login VARCHAR(100) NOT NULL, - idsite INTEGER UNSIGNED NOT NULL, - access VARCHAR(10) NULL, - PRIMARY KEY(login, idsite) -) DEFAULT CHARSET=utf8 -``` - -##### user_language - -This table stores [User Language Choice information](#other-data-user-language-choice). - -The `CREATE TABLE` SQL for this table is: - -```sql -CREATE TABLE user_language ( - login VARCHAR( 100 ) NOT NULL , - language VARCHAR( 10 ) NOT NULL , - PRIMARY KEY ( login ) -) DEFAULT CHARSET=utf8 -``` - -This table is created by the [LanguagesManager](https://github.com/piwik/piwik/tree/master/plugins/LanguagesManager) plugin. - -#### option - -This table stores [Option](#other-data-options) data. - -The `CREATE TABLE` SQL for this table is: - -```sql -CREATE TABLE `option` ( - option_name VARCHAR( 255 ) NOT NULL, - option_value LONGTEXT NOT NULL, - autoload TINYINT NOT NULL DEFAULT '1', - PRIMARY KEY ( option_name ), - INDEX autoload( autoload ) -) DEFAULT CHARSET=utf8 -``` - -#### logger_message - -This table is used by the database logging backend. - -The `CREATE TABLE` SQL for this table is: - -```sql -CREATE TABLE logger_message ( - idlogger_message INTEGER UNSIGNED NOT NULL AUTO_INCREMENT, - tag VARCHAR(50) NULL, - timestamp TIMESTAMP NULL, - level VARCHAR(16) NULL, - message TEXT NULL, - PRIMARY KEY(idlogger_message) -) DEFAULT CHARSET=utf8 -``` - -#### session - -This table does not store entity data. It is used by Piwik to store session data (as an alternative to using file-based sessions). - -The `CREATE TABLE` SQL for this table is: - -```sql -CREATE TABLE session ( - id CHAR(32) NOT NULL, - modified INTEGER, - lifetime INTEGER, - data TEXT, - PRIMARY KEY ( id ) -) DEFAULT CHARSET=utf8 -``` - -## Other Backends - -Currently the MySQL backend is the only available backend. The use of MySQL is scattered throughout Piwik, so at the moment it is also not possible to create other backends. - -That being said, the use of other peristence solutions is on our TODO list. Piwik will eventually work with different relational databases and with different NoSQL solutions. - ## Learn more * To learn **how the tracker inserts log data** see our [All About Tracking](/guides/all-about-tracking) guide. From 48f3bfd952882046eea7441ed6dca6cfc4031c2a Mon Sep 17 00:00:00 2001 From: Matthieu Napoli Date: Wed, 17 Dec 2014 16:48:59 +1300 Subject: [PATCH 3/3] #22 #39 Renamed the Persistence guide into "Piwik Data" and extracted "Plugin Data" into a separate article --- .../Content/Category/DevelopCategory.php | 1 + docs/persistence-and-the-mysql-backend.md | 153 +++--------------- docs/plugin-data.md | 93 +++++++++++ 3 files changed, 120 insertions(+), 127 deletions(-) create mode 100644 docs/plugin-data.md diff --git a/app/helpers/Content/Category/DevelopCategory.php b/app/helpers/Content/Category/DevelopCategory.php index 5b6f85ab8..e44fc482a 100644 --- a/app/helpers/Content/Category/DevelopCategory.php +++ b/app/helpers/Content/Category/DevelopCategory.php @@ -51,6 +51,7 @@ public function getItems() new EmptySubCategory('Archiving and data', [ new Guide('all-about-analytics-data'), new Guide('persistence-and-the-mysql-backend'), + new Guide('plugin-data'), ]), new Guide('piwik-configuration'), new EmptySubCategory('Security', [ diff --git a/docs/persistence-and-the-mysql-backend.md b/docs/persistence-and-the-mysql-backend.md index 10b176d0d..16a86ea47 100644 --- a/docs/persistence-and-the-mysql-backend.md +++ b/docs/persistence-and-the-mysql-backend.md @@ -1,19 +1,12 @@ --- category: Develop --- -# Persistence & the MySQL Backend +# Piwik Data -## About this guide +Piwik persists two main types of data: -**Read this guide if** - -* you'd like to know **how your plugin can persist new non-analytics data** -* you'd like to know **what information is stored when Piwik stores analytics data, log data and miscellaneous data** -* you'd like to know **how Piwik uses MySQL to persist data** - -## What is persisted - -Piwik persists two main types of data: log data and archive data. **Log data** is everything that Piwik tracks and **archive data** is processed analytics data that is cached. +- **log data**: raw analytics data that Piwik receives in the tracker +- **archive data**: aggregated analytics data (constructed from log data) that is cached and used to build reports Piwik also persists other simpler forms of data including: @@ -22,9 +15,9 @@ Piwik also persists other simpler forms of data including: - goals - options -_Note: Piwik uses PHP arrays to hold data that will be persisted. When we describe what information is in each persisted entity, we list properties by the string name used to store the property in the entity array._ +*Note: Piwik uses PHP arrays to hold persisted data. In the descriptions below we list properties by the name used in the PHP array.* -### Log data +## Log data There are four types of log data: @@ -42,7 +35,7 @@ Log data is read when calculating analytics data and old data will sometimes be Backends must ensure that inserting new log data is as fast as possible and aggregating log data is not too slow (though obviously, faster is better). -#### Visits +### Visits Visits are stored in the `log_visit` table. @@ -71,10 +64,8 @@ Each visit contains the following information: - `referer_type`: the type of this visitor's referrer. Can be one of the following values: - **Common::REFERRER\_TYPE\_DIRECT\_ENTRY**: If set to this value, other `referer_...` fields have no meaning. - **Common::REFERRER\_TYPE\_SEARCH\_ENGINE**: If set to this value, `referer_url` is the url of the search engine and `referer_keyword` is the keyword used (if we can find it). - - **Common::REFERRER\_TYPE\_WEBSITE**: If set to this value, `referer_url` is the url of the website. - **Common::REFERRER\_TYPE\_CAMPAIGN**: If set to this value, `referer_name` is the name of the campaign. - <-- TODO: double check campaign info --> - `referer_name`: referrer name; its meaning depends on the specific referrer type - `referer_url`: the referrer URL; its meaning depends on the specific referrer type - `referer_keyword`: the keyword used if a search engine was the referrer @@ -113,16 +104,14 @@ Each visit contains the following information: Some plugins, such as the [Provider](https://github.com/piwik/piwik/tree/master/plugins/Provider) plugin, will add new information to visits. -##### Table details +#### Table details The `index_idsite_config_datetime` index is used when trying to recognize returning visitors. The `index_idsite_datetime` index is used when aggregating visits. Since log aggregation occurs only for individual day periods, this index helps Piwik find the visits for a website and period quickly. Without it, log aggregation would require a table scan through the entire `log_visit` table. - - -#### Visit Actions +### Visit Actions Visits also contain a list of actions, one for each action the visitor makes during the visit. Those are stored in the `log_link_visit_action` table. @@ -146,7 +135,7 @@ Visit actions contain the following information: - `custom_var_v5`: the custom variable value of the slot for page custom variables - `custom_float`: an unspecified float field, usually used to hold the time it took the server to serve this action -##### Table details +#### Table details The `idsite` and `idvisitor` columns are copied from the visit action's associated visit in order to avoid having to join the log_visit table in some cases. @@ -155,7 +144,7 @@ The `index_idvisit` index allows Piwik to quickly query the visit actions for a The `index_idsite_servertime` index is used when aggregating visit actions. It allows quick access to the visit actions that were tracked for a specific website during a specific period and lets us avoid a table scan through the whole table. -#### Action Types +### Action Types Action types, such as a specific URL or page title, are analyzed as well as visits. Such analysis can lead to an understanding of, for example, which pages are more relevant to visitors than others. @@ -180,12 +169,12 @@ Action types are persisted in the `log_action` table and contain the following i - `2`: `'https://'` - `3`: `'https://www.'` -##### Table details +#### Table details The `index_type_hash` index is used during tracking to find existing action types. -#### Conversions +### Conversions When a visit action is tracked that matches a goal's conversion parameters, a conversion is created and persisted. A conversion is a tally that counts a desired action that one of your visitors took. Piwik will analyze these tallies in conjunction with the actions that caused them in order to help Piwik users understand how to make their visitors take more desired actions. @@ -208,14 +197,14 @@ Conversions are stored in the `log_conversion` table and consist of the followin - `revenue_shipping`: if this conversion is for an ecommerce order or abandoned cart, this is the total cost of shipping - `revenue_discount`: if this conversion is for an ecommerce order or abandoned cart, this is the total discount applied to the order -##### Table details +#### Table details All extra information stored in the table that is not listed above is replicated from the Visit entity this conversion is for. This allows us to avoid joining the `log_visit` table in certain cases. The `index_idsite_datetime` index is used when aggregating conversions. It allows quick access to the conversions that were tracked for a specific website during a specific period and lets us avoid a table scan through the entire table. -#### Ecommerce items (aka conversion items) +### Ecommerce items (aka conversion items) An ecommerce item is an item that was sold in an ecommerce order or abandoned in an abandoned cart. @@ -234,13 +223,13 @@ Ecommerce items are stored in the `log_conversion_item` table and consist of the - `quantity`: the amount of this item that were present in the associated ecommerce order - `deleted`: whether this item was removed from the order or not -##### Table details +#### Table details The `idsite`, `idvisitor`, `server_time` and `idvisit` columns are copied from the Conversion entity this Ecommerce Item belongs to. They are copied so we can aggregate Ecommerce Items without having to join other tables. The `index_idsite_servertime` index is used when aggregating ecommerce items. It allows quick access to the items that were tracked for a specific website and during a specific period and lets us avoid a table scan through the entire table. -### Archive data +## Archive data Archive data consists of **metrics** and **reports**. Metrics are numeric values and are stored as such. Reports are stored in [DataTable](/api-reference/Piwik/DataTable) instances and persisted as compressed binary strings. @@ -262,7 +251,7 @@ All archive data will contain the following information: - `ts_archived`: The datetime the archive data was cached. - `value`: Either a numeric value (for a metric) or a binary string (for a report). -#### Table details +### Table details Archive data is stored in tables partitioned by months, and missing tables are created automatically. Reports that aggregate visits from January 2012 will be held in a different table from reports that aggregate visits from February 2012. @@ -278,10 +267,10 @@ In `archive_blob` tables: - the `index_period_archived` index is used in the same way as the one in `archive_numeric` tables - `archive_blob` tables do not have an index that makes it fast to query for rows by site, period and archived date. This is because they should not be queried this way. Instead, the `archive_numeric` table should be queried and the `idarchive` values saved. These values can be used to query data in the `archive_blob` table. -### Other data +## Other data -#### Websites (aka sites) +### Websites (aka sites) **Site** entities contain information regarding a website whose visits are tracked. There won't be nearly as many of these as there are visits and archive data entries, but they will be queried often. @@ -310,7 +299,7 @@ Site entities also contain a list of extra URLs that can be used to access the w Site entity data access occurs primarily through the [Piwik\Site](/api-reference/Piwik/Site) class. Anything that cannot be queried through that class can be queried through the [SitesManager](https://github.com/piwik/piwik/tree/master/plugins/SitesManager) core plugin. -#### Goals +### Goals Each site has an optional list of goals. A goal is a desired action that a website visitor should take. @@ -338,7 +327,7 @@ Goals are stored in the `goal` table and contain the following information: _Note: The ecommerce and abandoned cart goals are two special goals with special IDs. Ecommerce websites automatically have these goals._ -#### Users +### Users User entities describe each Piwik user except the Super User. They are persisted in the `users` table. @@ -356,116 +345,25 @@ User data is read on every UI and [Reporting API](/guides/piwiks-reporting-api) There is some user related information that is not stored directly in user entities. They are descirbed below: -#### User permissions +### User permissions Users can be allowed and disallowed access to websites. Piwik persists each user's access level for each website they have access to in the `access` table. To read more about this, read the [Permissions](/guides/permissions) guide. -#### User language choice +### User language choice Piwik will also persist each user's language of choice. User logins are associated with the name of the language (written in the chosen language as opposed to English). This association and the persistence logic is implemented by the [LanguagesManager](https://github.com/piwik/piwik/tree/master/plugins/LanguagesManager) plugin. -#### Options +### Options [Options](/api-reference/Piwik/Option) are key-value pairs where the key is a string and the value is a another string (possibly bigger and possibly binary). They are queried on every UI and [Reporting API](/guides/piwiks-reporting-api) request. The tracker will [cache](/guides/all-about-tracking#the-tracker-cache) relevant option values and so will only query options when the cache needs updating. Some options should be loaded on every non-tracking request. These options have a special **autoload** property set to `1`. -## Plugin Persistence - -Plugins can provide persistence for new data if they need to. At the moment, since MySQL is the only supported backend, this means directly adding and using new tables. - -To add new tables to Piwik's MySQL database, execute a `CREATE TABLE` statement in the plugin descriptor's [install](/api-reference/Piwik/Plugin#install) method. For example: - -```php -use Piwik\Db; -use Piwik\Common; -use \Exception; - -public class MyPlugin extends \Piwik\Plugin -{ - // ... - - public function install() - { - try { - $sql = "CREATE TABLE " . Common::prefixTable('mynewtable') . " ( - mykey VARCHAR( 10 ) NOT NULL , - mydata VARCHAR( 100 ) NOT NULL , - PRIMARY KEY ( mykey ) - ) DEFAULT CHARSET=utf8 "; - Db::exec($sql); - } catch (Exception $e) { - // ignore error if table already exists (1050 code is for 'table already exists') - if (!Db::get()->isErrNo($e, '1050')) { - throw $e; - } - } - } - - // ... -} -``` - -Plugins should also clean up after themselves by dropping the tables in the [uninstall](/api-reference/Piwik/Plugin#uninstall) method: - -```php -use Piwik\Db; -use Piwik\Common; -use \Exception; - -public class MyPlugin extends \Piwik\Plugin -{ - // ... - - public function uninstall() - { - Db::dropTables(Common::prefixTable('mynewtable')); - } - - // ... -} -``` - -**Note: New tables should be appropriately [prefixed](/api-reference/Piwik/Common#prefixtable).** - -### Augmenting existing tables - -Plugins can also augment existing tables. If, for example, a plugin wanted to track extra visit information, the plugin could add columns to log data tables and set these columns during tracking.This would also be done in the [install](/api-reference/Piwik/Plugin#install) method: - -```php -use Piwik\Db; - -public class MyPlugin extends \Piwik\Plugin -{ - // ... - - public function install() - { - try { - $q1 = "ALTER TABLE `" . Common::prefixTable("log_visit") . "` - ADD `mynewdata` VARCHAR( 100 ) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL AFTER `config_os`,"; - Db::exec($q1); - } catch (Exception $e) { - // ignore column already exists error - if (!Db::get()->isErrNo($e, '1060')) { - throw $e; - } - } - } - - // ... -} -``` - -Plugins should remove the column in the [uninstall](/api-reference/Piwik/Plugin#uninstall) method, **unless doing so take very long time**. Since log tables can have millions and even billions of entries, removing columns from these tables when a plugin is uninstalled would be a bad idea. - - - ## Learn more * To learn **how the tracker inserts log data** see our [All About Tracking](/guides/all-about-tracking) guide. @@ -473,3 +371,4 @@ Plugins should remove the column in the [uninstall](/api-reference/Piwik/Plugin# * To learn **how archive data is cached** see our [All About Analytics](/guides/all-about-analytics-data) guide. * To learn **about Piwik's logging utility** see this section in our [Getting started extending Piwik](/guides/getting-started-part-1) guide. * To learn **about database backed sessions** read [this FAQ entry](http://piwik.org/faq/how-to-install/faq_133/). +* To learn **how plugins can persist data** read the [Plugin Data](/guides/plugin-data) guide. diff --git a/docs/plugin-data.md b/docs/plugin-data.md new file mode 100644 index 000000000..707cb18c4 --- /dev/null +++ b/docs/plugin-data.md @@ -0,0 +1,93 @@ +--- +category: Develop +--- +# Plugin Data + +## Adding new tables + +Plugins can provide persistence for new data if they need to. At the moment, since MySQL is the only supported backend, this means directly adding and using new tables. + +To add new tables to Piwik's MySQL database, execute a `CREATE TABLE` statement in the plugin descriptor's [install](/api-reference/Piwik/Plugin#install) method. For example: + +```php +use Piwik\Db; +use Piwik\Common; +use \Exception; + +public class MyPlugin extends \Piwik\Plugin +{ + // ... + + public function install() + { + try { + $sql = "CREATE TABLE " . Common::prefixTable('mynewtable') . " ( + mykey VARCHAR( 10 ) NOT NULL , + mydata VARCHAR( 100 ) NOT NULL , + PRIMARY KEY ( mykey ) + ) DEFAULT CHARSET=utf8 "; + Db::exec($sql); + } catch (Exception $e) { + // ignore error if table already exists (1050 code is for 'table already exists') + if (!Db::get()->isErrNo($e, '1050')) { + throw $e; + } + } + } + + // ... +} +``` + +Plugins should also clean up after themselves by dropping the tables in the [uninstall](/api-reference/Piwik/Plugin#uninstall) method: + +```php +use Piwik\Db; +use Piwik\Common; +use \Exception; + +public class MyPlugin extends \Piwik\Plugin +{ + // ... + + public function uninstall() + { + Db::dropTables(Common::prefixTable('mynewtable')); + } + + // ... +} +``` + +**Note: New tables should be appropriately [prefixed](/api-reference/Piwik/Common#prefixtable).** + +## Augmenting existing tables + +Plugins can also augment existing tables. If, for example, a plugin wanted to track extra visit information, the plugin could add columns to log data tables and set these columns during tracking.This would also be done in the [install](/api-reference/Piwik/Plugin#install) method: + +```php +use Piwik\Db; + +public class MyPlugin extends \Piwik\Plugin +{ + // ... + + public function install() + { + try { + $q1 = "ALTER TABLE `" . Common::prefixTable("log_visit") . "` + ADD `mynewdata` VARCHAR( 100 ) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL AFTER `config_os`,"; + Db::exec($q1); + } catch (Exception $e) { + // ignore column already exists error + if (!Db::get()->isErrNo($e, '1060')) { + throw $e; + } + } + } + + // ... +} +``` + +Plugins should remove the column in the [uninstall](/api-reference/Piwik/Plugin#uninstall) method, **unless doing so take very long time**. Since log tables can have millions and even billions of entries, removing columns from these tables when a plugin is uninstalled would be a bad idea.