forked from asheeshts/ghostbuster
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest.py
340 lines (263 loc) · 14.1 KB
/
test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
import numpy as np
import dill as pickle
from transformers import AutoTokenizer, AutoModelForCausalLM
from utils.featurize import get_logprobs, normalize
from utils.n_gram import TrigramBackoff
from nltk.util import ngrams
from nltk.corpus import brown
from utils.symbolic import vec_functions, scalar_functions
from collections import defaultdict
from utils.featurize import convert_file_to_logprob_file, get_logprobs
import tqdm
import torch
import torch.nn.functional as F
from pdb import set_trace
import json
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
# Initialize tokenizer and trigram model
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-3B-Instruct")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = AutoModelForCausalLM.from_pretrained(
"meta-llama/Llama-3.2-3B-Instruct").to(device)
# model.eval()
# Initialize with an empty list or pre-trained data
sentences = brown.sents()
tokenized_corpus = []
for sentence in tqdm.tqdm(sentences):
tokens = tokenizer(" ".join(sentence))["input_ids"]
tokenized_corpus += tokens
trigram = TrigramBackoff(tokenized_corpus)
# Define vectors and functions
vectors = ["llama-logprobs", "unigram-logprobs", "trigram-logprobs"]
vec_combinations = defaultdict(list)
for vec1 in range(len(vectors)):
for vec2 in range(vec1):
for func in vec_functions:
if func != "v-div":
vec_combinations[vectors[vec1]].append(
f"{func} {vectors[vec2]}")
for vec1 in vectors:
for vec2 in vectors:
if vec1 != vec2:
vec_combinations[vec1].append(f"v-div {vec2}")
def get_words(exp):
"""
Splits up expression into words, to be individually processed
"""
return exp.split(" ")
def backtrack_functions(
max_depth=2,
):
"""
Backtrack all possible features.
"""
def helper(prev, depth):
if depth >= max_depth:
return []
all_funcs = []
prev_word = get_words(prev)[-1]
for func in scalar_functions:
all_funcs.append(f"{prev} {func}")
for comb in vec_combinations[prev_word]:
all_funcs += helper(f"{prev} {comb}", depth + 1)
return all_funcs
ret = []
for vec in vectors:
ret += helper(vec, 0)
return ret
def score_ngram(doc, model, tokenizer, n=3):
"""
Returns vector of ngram probabilities given document, model and tokenizer
"""
scores = []
tokens = (
tokenizer(doc.strip())[1:] if n == 1 else (
n - 2) * [2] + tokenizer(doc.strip())
)
for i in ngrams(tokens, n):
scores.append(model.n_gram_probability(i))
return np.array(scores)
def get_all_logprobs(
generate_dataset,
preprocess=lambda x: x.strip(),
verbose=True,
trigram=None,
tokenizer=None,
num_tokens=2047,
):
llama_logprobs = {}
trigram_logprobs, unigram_logprobs = {}, {}
if verbose:
print("Loading logprobs into memory")
file_names = generate_dataset(lambda file: file, verbose=False)
to_iter = tqdm.tqdm(file_names) if verbose else file_names
for file in to_iter:
if "logprobs" in file:
continue
with open(file, "r") as f:
doc = preprocess(f.read())
llama_logprobs[file] = get_logprobs(
convert_file_to_logprob_file(file, "llama-13b")
)[:num_tokens]
trigram_logprobs[file] = score_ngram(
doc, trigram, tokenizer, n=3)[:num_tokens]
unigram_logprobs[file] = score_ngram(doc, trigram.base, tokenizer, n=1)[
:num_tokens
]
return llama_logprobs, trigram_logprobs, unigram_logprobs
all_funcs = backtrack_functions(max_depth=3)
np.random.seed(0)
# Define best features
best_features = [
"trigram-logprobs v-add unigram-logprobs v-< llama-logprobs s-var",
"llama-logprobs v-div unigram-logprobs v-div llama-logprobs s-l2",
"unigram-logprobs v-add llama-logprobs s-max",
"trigram-logprobs v-add unigram-logprobs v-> llama-logprobs s-avg-top-25",
"unigram-logprobs v-> llama-logprobs v-div unigram-logprobs s-var",
"unigram-logprobs v-div trigram-logprobs v-sub llama-logprobs s-min",
"unigram-logprobs v-mul llama-logprobs s-avg",
"unigram-logprobs v-sub llama-logprobs s-avg",
"trigram-logprobs v-mul unigram-logprobs v-div trigram-logprobs s-var",
"trigram-logprobs v-add llama-logprobs s-var",
"trigram-logprobs v-sub unigram-logprobs v-> llama-logprobs s-l2",
"trigram-logprobs v-mul unigram-logprobs v-> llama-logprobs s-avg",
"unigram-logprobs v-< llama-logprobs v-div trigram-logprobs s-var",
]
# Function to calculate n-gram scores
def score_ngram(doc, model, tokenizer, n=3):
tokens = tokenizer(doc.strip())[1:] if n == 1 else (
n - 2) * [2] + tokenizer(doc.strip())
scores = [model.n_gram_probability(i) for i in ngrams(tokens, n)]
return np.array(scores)
# Function to predict text
vocab_map = {}
vocab = tokenizer.vocab
for token in vocab:
idx = vocab[token]
vocab_map[idx] = token
def get_logprobs_direct(text):
"""
Returns the calculated feature from the logprobs of a given text input
based on the provided expression.
"""
with torch.no_grad():
encodings = tokenizer(text, return_tensors="pt").to(device)
logits = F.softmax(model(encodings["input_ids"]).logits, dim=2)
tokens = encodings["input_ids"]
indices = torch.tensor([[[i] for i in tokens[0]]])[:, 1:, :].to(device)
subwords = [vocab_map[int(idx)]
for idx in encodings["input_ids"][0][1:]]
subprobs = (
torch.gather(logits[:, :-1, :], dim=2, index=indices)
.flatten()
.cpu()
.detach()
.numpy()
)
subprobs = [-np.log(line) for line in subprobs]
logprobs = np.array([np.exp(-line) for line in subprobs])
return logprobs
def get_logprobs_for_text(
text,
preprocess=lambda x: x.strip(),
verbose=True,
trigram=None,
tokenizer=None,
num_tokens=2047,
):
llama_logprobs = {}
trigram_logprobs, unigram_logprobs = {}, {}
if verbose:
print("Calculating logprobs for the provided text")
# Preprocess the text
doc = preprocess(text)
# Calculate log probabilities
llama_logprobs = get_logprobs_direct(doc)[:num_tokens]
trigram_logprobs = score_ngram(doc, trigram, tokenizer, n=3)[:num_tokens]
unigram_logprobs = score_ngram(
doc, trigram.base, tokenizer, n=1)[:num_tokens]
return llama_logprobs, trigram_logprobs, unigram_logprobs
def get_exp_featurize_for_text(best_features, vector_map):
def calc_features(text, exp):
exp_tokens = get_words(exp)
curr = vector_map[exp_tokens[0]](text)
for i in range(1, len(exp_tokens)):
if exp_tokens[i] in vec_functions:
next_vec = vector_map[exp_tokens[i + 1]](text)
curr = vec_functions[exp_tokens[i]](curr, next_vec)
elif exp_tokens[i] in scalar_functions:
return scalar_functions[exp_tokens[i]](curr)
def exp_featurize(text):
return np.array([calc_features(text, exp) for exp in best_features])
return exp_featurize
def predict_text(text):
# Tokenize and score n-grams
llama_logprobs, trigram_logprobs, unigram_logprobs = get_logprobs_for_text(
text,
verbose=True,
tokenizer=lambda x: tokenizer(x)["input_ids"],
trigram=trigram,
num_tokens=2047,
)
# Map vectors to their respective logprobs
vector_map = {
"llama-logprobs": lambda file: llama_logprobs,
"trigram-logprobs": lambda file: trigram_logprobs,
"unigram-logprobs": lambda file: unigram_logprobs,
}
with open("llama_model.pkl", "rb") as f:
logistic_model = pickle.load(f)
mu = pickle.load(open("mu", "rb"))
sigma = pickle.load(open("sigma", "rb"))
data = get_exp_featurize_for_text(best_features, vector_map)(text)
data = normalize(data.reshape(1, -1), mu, sigma)
# Predict using the logistic regression model
prediction = logistic_model.predict(data)
probability = logistic_model.predict_proba(data)[0][0]
print(f"Probability: {probability:.4f}")
result = "GPT" if prediction == 1 else "Human"
print(f"Prediction: {result}")
return prediction
# Example usage
# text = """
# Humanity is a tapestry woven from countless threads of culture, history, experience, and aspiration. At its core, it represents the collective existence of human beings, encompassing the shared traits and unique diversities that define our species. Within the broad spectrum of human identity, there exists a complex interplay of emotions, intellect, and spirit that propels us toward progress, yet often pulls us into conflict.
# One of the most commendable aspects of humanity is our capacity for empathy. This intrinsic ability to understand and share the feelings of others forms the bedrock of our social interactions. It has fueled efforts to build communities, foster relationships, and, on a larger scale, strive for global cooperation. Through empathy, we forge connections that transcend geographical, cultural, and ideological boundaries. This is evident in humanitarian efforts and the universal reaction to crises, where individuals unite to offer support and aid to those in need, reminding us of our inherently compassionate nature.
# Yet, the narrative of humanity is not without its darker chapters. Our history is marred by conflict, prejudice, and an insatiable appetite for power and resources. These elements have often led to the exploitation of others and the environment, posing significant challenges to our collective well-being. The residue of these actions lingers in modern issues such as inequality, systemic injustice, and environmental degradation. Nevertheless, these challenges have also driven us to question, to learn, and to evolve in our understanding of justice and equality.
# Innovation and creativity stand as testaments to the human spirit's resilience and ingenuity. From the creation of the wheel to the mapping of the human genome, humanity's pursuit of knowledge and improvement has been relentless. This drive has propelled technological advancements that revolutionize our lives and expand our capabilities. Moreover, the arts—encompassing literature, music, and visual media—reflect and critique our human experience, offering insights into our condition and exploring existential themes that resonate across time and space.
# Throughout history, humanity has been on a quest for meaning, seeking to understand our place in the universe. Philosophies and religions have emerged as frameworks that guide our ethics and philosophies, providing solace and direction amidst the uncertainties of life. These belief systems underscore a recurring theme in the human narrative: the search for truth and enlightenment.
# In today’s globalized world, the notion of humanity is increasingly vital as it encourages us to cherish diversity while recognizing our shared destiny. The intertwining of cultures and the instantaneous exchange of ideas through digital connectivity offer unprecedented opportunities for collaboration. However, this interconnectedness also demands a conscientious approach to ensuring that our actions contribute positively to the global community.
# In essence, humanity embodies both the extraordinary and the ordinary facets of life. It captures our potential to transcend individual limitations in the pursuit of collective betterment. As we navigate the complexities of the modern world, the essence of humanity lies in a delicate balance: recognizing our flaws, cultivating our strengths, and, most importantly, holding onto hope. The enduring challenge is to harness the richness of our diversity to build a future where equity, peace, and prosperity are not mere ideals but tangible realities.
# """
# text = """
# This study presents a comprehensive calculation of prompt diphoton production cross sections at the Tevatron and Large Hadron Collider (LHC) energies. The prompt diphoton process, wherein two photons are produced directly in the hard scattering process, is an essential channel for probing the properties of the Higgs boson and searching for new physics phenomena. To accurately determine the cross sections, we employ state-of-the-art theoretical frameworks incorporating next-to-leading-order (NLO) QCD corrections and parton shower effects. We systematically analyze the energy dependence of the cross sections, examining the impact of various kinematic cuts and isolation criteria commonly used in experimental analyses. By comparing the results with available experimental data, we validate our theoretical predictions and provide useful insights into the sources of theoretical uncertainties. Our calculations serve as valuable references for the design and interpretation of diphoton measurements at both the Tevatron and LHC, crucial for understanding the underlying physics processes and guiding future experimental endeavors in high-energy physics.
# """
# prediction = predict_text(text)
# print(f"Prediction: {'GPT' if prediction == 1 else 'Human'}")
path = "/home/asheesh/Documents/beast/ai-detector-survey/DetectRL/Benchmark/Original_Dataset/arxiv_2800.json"
with open(path, "r") as f:
data = json.load(f)
predictions = []
ground_truth = []
for item in tqdm.tqdm(data):
abstract = item["abstract"]
direct_prompt = item["direct_prompt"]
prediction = predict_text(abstract)
# prediction for abstract
predictions.append(prediction)
# ground truth for abstract
ground_truth.append(0)
# prediction for direct prompt
prediction_direct = predict_text(direct_prompt)
predictions.append(prediction_direct)
# ground truth for direct prompt
ground_truth.append(1)
# Calculate metrics
conf_matrix = confusion_matrix(ground_truth, predictions)
print("\nConfusion Matrix:")
print(conf_matrix)
print("\nClassification Report:")
print(classification_report(ground_truth,
predictions, target_names=['Human', 'GPT']))
accuracy = accuracy_score(ground_truth, predictions)
print(f"\nAccuracy: {accuracy:.4f}")
set_trace()