-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
6 changed files
with
250 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,107 @@ | ||
package jaglion; | ||
|
||
import java.io.IOException; | ||
import java.util.UUID; | ||
|
||
import org.apache.commons.codec.digest.DigestUtils; | ||
|
||
import org.apache.pig.EvalFunc; | ||
import org.apache.pig.data.Tuple; | ||
|
||
import org.apache.hadoop.conf.Configuration; | ||
import org.apache.hadoop.hbase.HBaseConfiguration; | ||
import org.apache.hadoop.hbase.HTableDescriptor; | ||
import org.apache.hadoop.hbase.HColumnDescriptor; | ||
import org.apache.hadoop.hbase.MasterNotRunningException; | ||
import org.apache.hadoop.hbase.ZooKeeperConnectionException; | ||
import org.apache.hadoop.hbase.client.HBaseAdmin; | ||
import org.apache.hadoop.hbase.client.HTable; | ||
import org.apache.hadoop.hbase.client.Put; | ||
import org.apache.hadoop.hbase.util.Bytes; | ||
|
||
public class ANONYMIZE extends EvalFunc<String> | ||
{ | ||
public Configuration config; | ||
public String fmap = "FowardMap"; | ||
public String rmap = "ReversMap"; | ||
|
||
public String exec(Tuple input) throws IOException { | ||
|
||
// As long as we have a reasonable inputs | ||
if (input.size() != 2 || input.get(0) == null || input.get(1) == null) | ||
return null; | ||
|
||
// Connect to HBASE | ||
config = HBaseConfiguration.create(); | ||
HBaseAdmin hba = new HBaseAdmin(config); | ||
HTableDescriptor tableDescriptor = null; | ||
HColumnDescriptor columnDescriptor = null; | ||
|
||
// Check and optionally create the forward mapping table | ||
if (hba.tableExists(fmap) == false) { | ||
tableDescriptor = new HTableDescriptor(fmap); | ||
columnDescriptor = new HColumnDescriptor("AnonymousValue"); | ||
tableDescriptor.addFamily(columnDescriptor); | ||
hba.createTable(tableDescriptor); | ||
} | ||
|
||
// Check and optionally create the reverse map table | ||
if (hba.tableExists(rmap) == false) { | ||
tableDescriptor = new HTableDescriptor(rmap); | ||
columnDescriptor = new HColumnDescriptor("ActualValue"); | ||
tableDescriptor.addFamily(columnDescriptor); | ||
hba.createTable(tableDescriptor); | ||
} | ||
|
||
try { | ||
// Read inputs | ||
String id = (String)input.get(0); | ||
int unique = (int)input.get(1); | ||
System.out.println("Value: " + id + " Unique: " + unique); | ||
Put p = null; | ||
|
||
// Generate value for input id | ||
String uuid = UUID.randomUUID().toString(); | ||
String hash = DigestUtils.shaHex(id); | ||
|
||
System.out.println("UUID: " + uuid + " Hash: " + hash); | ||
|
||
// Store the value forward and backward | ||
HTable forward = new HTable(config, fmap); | ||
HTable backward = new HTable(config, rmap); | ||
|
||
// Forward Map | ||
p = new Put(Bytes.toBytes(id)); | ||
p.add(Bytes.toBytes("AnonymousValue"), Bytes.toBytes("hash"), Bytes.toBytes(hash)); | ||
p.add(Bytes.toBytes("AnonymousValue"), Bytes.toBytes("uuid"), Bytes.toBytes(uuid)); | ||
forward.put(p); | ||
|
||
System.out.println("Stored forward map."); | ||
|
||
// Reverse Map - Hash | ||
p = new Put(Bytes.toBytes(hash)); | ||
p.add(Bytes.toBytes("ActualValue"), Bytes.toBytes(""), Bytes.toBytes(id)); | ||
backward.put(p); | ||
|
||
System.out.println("Stored reverse map - hash."); | ||
|
||
// Reverse Map - UUID | ||
p = new Put(Bytes.toBytes(uuid)); | ||
p.add(Bytes.toBytes("ActualValue"), Bytes.toBytes(""), Bytes.toBytes(id)); | ||
backward.put(p); | ||
|
||
System.out.println("Stored reverse map - uuid."); | ||
|
||
// If the request was for a unique value return the uuid | ||
if (unique == 1) { | ||
return uuid; | ||
} else { | ||
// otherwise return the hashed value | ||
return hash; | ||
} | ||
} catch(Exception e) { | ||
System.out.println(input); | ||
throw new IOException("Caught exception processing input row ", e); | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
package jaglion; | ||
|
||
import java.io.IOException; | ||
import java.util.UUID; | ||
|
||
import org.apache.commons.codec.digest.DigestUtils; | ||
|
||
import org.apache.pig.EvalFunc; | ||
import org.apache.pig.data.Tuple; | ||
|
||
import org.apache.hadoop.conf.Configuration; | ||
import org.apache.hadoop.hbase.HBaseConfiguration; | ||
import org.apache.hadoop.hbase.HTableDescriptor; | ||
import org.apache.hadoop.hbase.HColumnDescriptor; | ||
import org.apache.hadoop.hbase.MasterNotRunningException; | ||
import org.apache.hadoop.hbase.ZooKeeperConnectionException; | ||
import org.apache.hadoop.hbase.client.HBaseAdmin; | ||
import org.apache.hadoop.hbase.client.HTable; | ||
import org.apache.hadoop.hbase.client.Get; | ||
import org.apache.hadoop.hbase.client.Result; | ||
import org.apache.hadoop.hbase.util.Bytes; | ||
|
||
public class DEANONYMIZE extends EvalFunc<String> | ||
{ | ||
public Configuration config; | ||
public String rmap = "ReversMap"; | ||
|
||
public String exec(Tuple input) throws IOException { | ||
|
||
// Check inputs to ensure there's only one value coming in and that it's not null | ||
if (input == null || input.size() != 1 || input.get(0) == null) | ||
return null; | ||
|
||
// Connect to the HBase database | ||
config = HBaseConfiguration.create(); | ||
HBaseAdmin hba = new HBaseAdmin(config); | ||
HTableDescriptor tableDescriptor = null; | ||
HColumnDescriptor columnDescriptor = null; | ||
|
||
// Ensure the reverse table exists | ||
if (hba.tableExists(rmap) == false) { | ||
throw new IOException("Value not found to map."); | ||
} | ||
|
||
try { | ||
String code = (String)input.get(0); | ||
String original = null; | ||
|
||
// The backward map table | ||
HTable backward = new HTable(config, rmap); | ||
|
||
// Retrieve the value from the reverse map | ||
Get g = new Get(Bytes.toBytes(code)); | ||
Result r = backward.get(g); | ||
byte[] value = r.getValue(Bytes.toBytes("ActualValue"), Bytes.toBytes("")); | ||
original = Bytes.toString(value); | ||
|
||
return original; | ||
} catch(Exception e) { | ||
System.out.println(input); | ||
throw new IOException("Caught exception processing input row ", e); | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
-- anonymize.pig | ||
|
||
-- -- Hadoop / HBase / Zookeeper | ||
REGISTER /usr/lib/zookeeper/zookeeper.jar; | ||
REGISTER /usr/lib/hadoop/lib/commons-codec-1.4.jar; | ||
REGISTER /usr/lib/hbase/hbase-client.jar; | ||
REGISTER /usr/lib/hbase/hbase-common.jar; | ||
REGISTER /usr/lib/hbase/hbase-protocol.jar; | ||
REGISTER /usr/lib/hbase/hbase-hadoop-compat.jar; | ||
REGISTER /usr/lib/hbase/lib/htrace-core.jar; | ||
|
||
-- -- Our UDF | ||
REGISTER bin/jaglion.jar; | ||
|
||
-- Load the data with variable length records | ||
-- pass $input to script as -param input='filename' | ||
-- load each line as a character array | ||
A = LOAD '$input' USING TextLoader AS (line:chararray); | ||
|
||
-- Split each line into first, rest | ||
SPLT = FOREACH A GENERATE STRSPLIT($0, ',', 2) AS A1; | ||
|
||
-- For each row in the data, deanonymize the first column, use the rest as is | ||
B = FOREACH SPLT GENERATE jaglion.ANONYMIZE(TRIM(A1.$0)), A1.$1; | ||
|
||
-- Store the deanonymized results out to a file | ||
-- pass $output to script as -param output='filename' | ||
STORE B INTO '$output' using PigStorage(','); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
-- deanonymize.pig | ||
|
||
-- -- Hadoop / HBase / Zookeeper | ||
REGISTER /usr/lib/zookeeper/zookeeper.jar; | ||
REGISTER /usr/lib/hadoop/lib/commons-codec-1.4.jar; | ||
REGISTER /usr/lib/hbase/hbase-client.jar; | ||
REGISTER /usr/lib/hbase/hbase-common.jar; | ||
REGISTER /usr/lib/hbase/hbase-protocol.jar; | ||
REGISTER /usr/lib/hbase/hbase-hadoop-compat.jar; | ||
REGISTER /usr/lib/hbase/lib/htrace-core.jar; | ||
|
||
-- -- Our UDF | ||
REGISTER bin/jaglion.jar; | ||
|
||
-- Load the anonymized data, with variable length records | ||
-- pass $input to script as -param input='filename' | ||
-- load each line as a character array | ||
A = LOAD '$input' USING TextLoader AS (line:chararray); | ||
|
||
-- Split each line into first, rest | ||
SPLT = FOREACH A GENERATE STRSPLIT($0, ',', 2) AS A1; | ||
|
||
-- For each row in the data, deanonymize the first column, use the rest as is | ||
B = FOREACH SPLT GENERATE jaglion.DEANONYMIZE(TRIM(A1.$0)), A1.$1; | ||
|
||
-- Store the deanonymized results out to a file | ||
-- pass $output to script as -param output='filename' | ||
STORE B INTO '$output' using PigStorage(','); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
-- test.pig | ||
REGISTER /usr/lib/zookeeper/zookeeper.jar; | ||
REGISTER /usr/lib/hbase/hbase-client.jar; | ||
REGISTER /usr/lib/hbase/hbase-common.jar; | ||
REGISTER /usr/lib/hbase/hbase-protocol.jar; | ||
REGISTER /usr/lib/hbase/hbase-hadoop-compat.jar; | ||
REGISTER /usr/lib/hbase/lib/htrace-core.jar; | ||
|
||
REGISTER bin/jaglion.jar; | ||
A = LOAD 'testdata'; | ||
DUMP A; | ||
B = FOREACH A GENERATE jaglion.ANONYMIZE($0, 0); | ||
DUMP B; | ||
-- C = FOREACH A GENERATE jaglion.ANONYMIZE($0, 1); | ||
-- DUMP C; | ||
-- D = FOREACH B GENERATE jaglion.DEANONYMIZE($0); | ||
-- DUMP D; | ||
-- E = FOREACH C GENERATE jaglion.DEANONYMIZE($0); | ||
-- DUMP E; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
Test-String | ||
Test2 | ||
IRJ | ||
aaabbb |