-
Notifications
You must be signed in to change notification settings - Fork 0
Apache Cassandra
Gustavo Armenta edited this page May 16, 2017
·
15 revisions
- Apple has 75,000+ nodes running Cassandra in 2015
- Data replication in same datacenter takes 150ms
- Data replication from US to Asia takes 750ms
- Don't store large files. Cassandra has a 2GB limit per row. In reality, you should test and probably work with 100MB chunk size files.
- Keep partition size under 100MB
- Keep collection columns under 1,000 items
- Create index lookup tables for collection columns
- Create Data Aggregations by doing PK, Year, Month, Day, Hour, Counter, Sum
- Use BATCH to insert entity in many tables
- Use UPDATE-WHERE-IF pattern to access data only once (e.g. reset password scenario)
CREATE KEYSPACE killrvideo WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 1};
USE killrvideo;
CREATE TABLE videos (
video_id timeuuid,
added_date timestamp,
description text,
title text,
user_id uuid,
PRIMARY KEY ((video_id)));
COPY videos FROM 'labwork/exercise-2/videos.csv' WITH HEADER=true;
SELECT COUNT(*) FROM videos;
SELECT * FROM videos LIMIT 10;
CREATE TABLE videos_by_title_year (
title text,
added_year int,
added_date timestamp,
description text,
user_id uuid,
video_id uuid,
PRIMARY KEY ((title, added_year)));
COPY videos_by_title_year FROM 'labwork/exercise-3/videos_by_title_year.csv' WITH HEADER=true;
SELECT * FROM videos_by_title_year LIMIT 10;
SELECT * FROM videos_by_title_year WHERE title='Chicago Demo';
SELECT * FROM videos_by_title_year WHERE title='Chicago Demo' AND added_year=2015;
CREATE TABLE videos_by_tag_year (
tag text,
added_year int,
video_id timeuuid,
added_date timestamp,
description text,
title text,
user_id uuid,
PRIMARY KEY ((tag), added_year, video_id)
) WITH CLUSTERING ORDER BY (added_year desc, video_id asc);
COPY videos_by_tag_year FROM 'labwork/exercise-4/videos_by_tag_year.csv' WITH HEADER=true;
SELECT * FROM videos_by_tag_year LIMIT 10;
DROP TABLE videos;
CREATE TABLE videos (
video_id timeuuid,
added_date timestamp,
description text,
tags set<text>,
title text,
user_id uuid,
PRIMARY KEY((video_id)));
COPY videos FROM 'labwork/exercise-5/videos.csv' WITH HEADER=true;
CREATE TYPE video_encoding (
bit_rates set<text>,
encoding text,
height int,
width int);
ALTER TABLE videos ADD encoding frozen<video_encoding>;
COPY videos (video_id, encoding) FROM 'labwork/exercise-5/videos_encoding.csv' WITH HEADER=true;
SELECT * FROM videos LIMIT 10;
CREATE TABLE videos_count_by_tag (
video_count counter,
tag text,
added_year int,
PRIMARY KEY (tag, added_year));
SOURCE 'labwork/exercise-6/videos_count_by_tag.cql';
SELECT * FROM videos_count_by_tag;