cassandra Example的一点理解及疑问

终于有时间看一下cassandra,记录一下。有很多地方个能理解有误,请大家指点。

package com.demo;

import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Map;

import org.apache.cassandra.thrift.Cassandra;
import org.apache.cassandra.thrift.Column;
import org.apache.cassandra.thrift.ColumnOrSuperColumn;
import org.apache.cassandra.thrift.ColumnParent;
import org.apache.cassandra.thrift.ColumnPath;
import org.apache.cassandra.thrift.ConsistencyLevel;
import org.apache.cassandra.thrift.InvalidRequestException;
import org.apache.cassandra.thrift.NotFoundException;
import org.apache.cassandra.thrift.SlicePredicate;
import org.apache.cassandra.thrift.SliceRange;
import org.apache.cassandra.thrift.SuperColumn;
import org.apache.cassandra.thrift.TimedOutException;
import org.apache.cassandra.thrift.UnavailableException;
import org.apache.thrift.TException;
import org.apache.thrift.protocol.TBinaryProtocol;
import org.apache.thrift.protocol.TProtocol;
import org.apache.thrift.transport.TSocket;
import org.apache.thrift.transport.TTransport;

public class ThriftExample {

	public static final String UTF8 = "UTF8";

	public static void main(String[] args) throws UnsupportedEncodingException, InvalidRequestException,
			UnavailableException, TimedOutException, TException, NotFoundException {
		TTransport tr = new TSocket("localhost", 9160);
		TProtocol proto = new TBinaryProtocol(tr);
		Cassandra.Client client = new Cassandra.Client(proto);// 创建一个客户端
		tr.open();

		// 相当于一个数据库,一般是一个应用一个keyspace,keyspace在storage-conf中配置
		String keyspace = "Keyspace1";

		// columnFamily,相当于一个表,表中有很多个行row,一行记录中有任意多column,列名和值都在column中,这就是基于column存储的意思
		// ,column的结构如下:三个属性都是必须的
		// struct Column {
		// 1: binary name,
		// 2: binary value,
		// 3: i64 timestamp,
		// }
		// 一个ColumnFamily单独存在一个文件中,
		String table = "Standard1";
		// key,用来获取一行记录
		String keyUserID = "1";
		String keyUserID2 = "2";
		// insert data
		long timestamp = System.currentTimeMillis();

		// ColumnPath确定列所在哪个表,也就是columnFamily,以及列名
		ColumnPath colPathName = new ColumnPath(table);

		// 设置column,列名为fullName
		colPathName.setColumn("fullName".getBytes(UTF8));

		// 插入一列,这里有六个参数:
		// keyspace:要插入到那个数据库
		// keyUserID:一个key,通过这个key以后能找到这行数据
		// colPathName:要插入的列,该列的路径是在Standard1表下,列名为fullName
		// "Chris Goffinet".getBytes(UTF8) :是这个列的值
		// timestamp:官网wiki上的解释是 用于解决冲突
		// ConsistencyLevel.ONE:一致性级别,one表示 确保在相应客户端之前至少写入1个节点的commit日志和内存表;返回第一个节点返回的数据
		client.insert(keyspace, keyUserID, colPathName, "阿伦·艾".getBytes(UTF8), timestamp,
				ConsistencyLevel.ONE);

		ColumnPath colPathAge = new ColumnPath(table);
		// 设置column,列名为fullName
		colPathAge.setColumn("age".getBytes(UTF8));

		client.insert(keyspace, keyUserID, colPathAge, "24".getBytes(UTF8), timestamp, ConsistencyLevel.ONE);
		// 到这里已经向cassandra中插入了两列数据,但其实是插入了一行数据,因为只有一个key:keyUserID
		// 也就是说,一个key对应一群column,当然也可以是superColumn

		// read single column
		// 读取一个某个列的数据
		// 参数和插入是差不多,参数的意思很明显,先找到 数据库:keyspace,再找到key:keyUserID
		// 再找到某个column:colPathName,找到了就返回一个ColumnOrSuperColumn
		// ColumnOrSuperColumn顾名思义,ColumnOrSuperColumn中有 column 或 super_column两个属性, 两个不能同时有值
		// 这里只有column
		Column col = client.get(keyspace, keyUserID, colPathName, ConsistencyLevel.ONE).column;

		System.out.println("column name: " + new String(col.name, UTF8));
		System.out.println("column name: " + new String(col.value, UTF8));
		System.out.println("column timestamp: " + new Date(col.timestamp));

		// 读取整个行
		SlicePredicate predicate = new SlicePredicate();
		SliceRange sliceRange = new SliceRange();
		sliceRange.setStart(new byte[0]);
		sliceRange.setFinish(new byte[0]);
		predicate.setSlice_range(sliceRange);
		System.out.println("\n读取整行数据:");

		// ColumnParent is used when selecting groups of columns from the same ColumnFamily
		// In directory structure terms, imagine * ColumnParent as ColumnPath + '/../'
		ColumnParent parent = new ColumnParent(table);
		// client.get_slice的作用是读取整行数据,返回 List<ColumnOrSuperColumn>
		// keyspace: 数据库
		// keyUserID: key
		// parent :
		// predicate:查询的范围
		// ConsistencyLevel.ONE :一致性级别
		List<ColumnOrSuperColumn> results = client.get_slice(keyspace, keyUserID, parent, predicate,
				ConsistencyLevel.ONE);
		for (ColumnOrSuperColumn result : results) {
			Column column = result.column;
			System.out.println(new String(column.name, UTF8) + " -> " + new String(column.value, UTF8));
		}
		System.out.println("\n读取多行中某列数据:");
		List<String> keys = new ArrayList<String>();
		keys.add(keyUserID2);
		keys.add(keyUserID);
		ColumnPath columnPath = new ColumnPath(table);
		columnPath.setColumn("fullName".getBytes(UTF8));

		Map<String, ColumnOrSuperColumn> resultsMap = client.multiget(keyspace, keys, columnPath,
				ConsistencyLevel.ONE);
		for (String key : resultsMap.keySet()) {
			ColumnOrSuperColumn columnOrSuperColumn = resultsMap.get(key);
			Column column = columnOrSuperColumn.column;
			printColumn(column);
		}
		// 下面是删除记录
		// ColumnPath removecolumnPath = new ColumnPath(table);
		// client.remove(keyspace, keyUserID, removecolumnPath, timestamp, ConsistencyLevel.ONE);
		tr.close();
	}

	public static void printSuperColumn(SuperColumn superColumn) {
		List<Column> list = superColumn.columns;
		for (Column c : list) {
			byte[] name = c.name;
			byte[] value = c.value;
			try {
				System.out.println(new String(name, UTF8) + " : " + new String(value, UTF8));
			} catch (UnsupportedEncodingException e) {
				e.printStackTrace();
			}
		}
	}

	public static void printColumn(Column c) {
		byte[] name = c.name;
		byte[] value = c.value;
		try {
			System.out.println(new String(name, UTF8) + " : " + new String(value, UTF8));
		} catch (UnsupportedEncodingException e) {
			e.printStackTrace();
		}
	}
}

这只是个简单的demo,还有很多地方我还没想明白,比如SuperColumn,为什么ColumnFamily中只能是SuperColumn或Column而不能同时存在,内在的原理还是没有整明白,
还有查询怎么做,key-value 只能通过key才能拿到value,很多时候我们都是知道了value想找到那个key,这样我们只能把我们要查询的value作为key,要查询的key作为value插入,以备以后查询,这样的话多麻烦啊,有时候查询条件还不确定。
现在想能不能用lucene做索引,不知道有没有用过。大家谁做个实际项目的出来说说啊。

你可能感兴趣的:(apache,数据结构,c,Lucene,cassandra)