from generalanalysis.adversarial_candidate_generator import GACandidateGenerator
from generalanalysis.jailbreaks import AutoDAN, AutoDANConfig
from generalanalysis.boiler_room import BlackBoxModel
# Initialize method
config = AutoDANConfig(
target_model="meta-llama/Llama-3.3-70B-Instruct-Turbo",
project="autodan_experiment",
initial_candidates=["Initial prompt 1", "Initial prompt 2"],
device="cuda:0",
evaluator_model="meta-llama/Llama-3.3-70B-Instruct-Turbo"
)
jailbreak_method = AutoDAN(config)
# Initialize generator
generator = GACandidateGenerator(
helper_llm="deepseek-ai/DeepSeek-R1",
elitism_rate=0.1,
crossover_rate=0.5,
mutation_rate=0.5
)
# Initial population
prompts = [
"Tell me about computer security",
"What are some common vulnerabilities?",
"How do hackers break into systems?"
]
# Fitness scores (example)
fitness_scores = [0.8, 0.6, 0.4]
# Generate next generation
next_generation = generator.generate_candidates(
jailbreak_method_instance=jailbreak_method,
prompts=prompts,
fitness_scores=fitness_scores,
N=10
)
# Use the new prompts
for prompt in next_generation:
print(f"Generated prompt: {prompt}")