Skip to content

Commit

Permalink
Merge pull request #6 from TianyiQ/main
Browse files Browse the repository at this point in the history
feat: add support for API-based benchmarking
  • Loading branch information
TianyiQ authored Sep 26, 2024
2 parents 5d8dd9f + 5b934a9 commit e1f7a8b
Show file tree
Hide file tree
Showing 28 changed files with 249 additions and 349 deletions.
35 changes: 35 additions & 0 deletions __init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
from benchmark.framework import JudgeBase, ExamineeBase
from benchmark.dummies import DummyJudge
from challenges.follow import FollowJudge
from challenges.predict import PredictJudge
from challenges.coevolve import CoevolveJudge

from algorithms.lifelong_dpo import LifelongDPOExaminee
from algorithms.lifelong_rlhf import LifelongRLHFExaminee
from algorithms.extrapolative_dpo import ExtrapolativeDPOExaminee
from algorithms.extrapolative_rlhf import ExtrapolativeRLHFExaminee
from benchmark.dummies import DummyExaminee

from run_benchmark import run_benchmark

from src.abstractions.model import Model, fill_in_QA_template
from src.abstractions.data import Data, DataFileCollection

__all__ = [
"run_benchmark",
"Model",
"Data",
"DataFileCollection",
"JudgeBase",
"ExamineeBase",
"DummyJudge",
"FollowJudge",
"PredictJudge",
"CoevolveJudge",
"DummyExaminee",
"LifelongDPOExaminee",
"LifelongRLHFExaminee",
"ExtrapolativeDPOExaminee",
"ExtrapolativeRLHFExaminee",
"fill_in_QA_template",
]
21 changes: 11 additions & 10 deletions benchmark/framework.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,21 +14,21 @@ class JudgeBase(ABC):
Each judge class corresponds to a challenge.
"""

"""Evaluation results"""
# Evaluation results
examinee_model_history: List[Model]
judge_model_history: List[Model]
eval_times: int

"""Query statistics"""
# Query statistics
query_times: int
query_total_length: int

"""Current model"""
# Information about current model
current_model: Model
current_timestep: int
model_size: int

"""Information determined at initialization"""
# Information determined at initialization
instance_id: str
model_list: List[Model]
template_type: Literal["alpaca", "mistral"]
Expand Down Expand Up @@ -143,7 +143,7 @@ def query_from_examinee(
"""
This method is called by the examinee to query the judge, which the judge will answer according to human preferences at the current timestep.
The examinee will use this information to learn about the latest human preference, and update its language model accordingly.
The base class implementation answers the prompt by directly querying `self.current_model``
The base class implementation answers the prompt by directly querying `self.current_model`.
You could either call the base class implementation in your subclass's implementation (possibly supplying a different `model`),
or override it if necessary.
"""
Expand Down Expand Up @@ -283,16 +283,16 @@ class ExamineeBase(ABC):
In most cases, you need to re-implement most or all all the methods in your subclass. Base implementations are only provided as an example.
"""

"""Current model"""
# Information about current model
current_model: Model
current_timestep: int
template_type: Literal["alpaca", "mistral"]

"""Information determined at initialization"""
# Information determined at initialization
instance_id: str
checkpoint_id: str

"""Query statistics"""
# Query statistics
query_times: int

def __init__(self, **kwargs):
Expand Down Expand Up @@ -431,8 +431,9 @@ def run(self, judge: JudgeBase) -> Iterable:
Every iteration corresponds to the passing of a timestep.
In this way, the examinee can control the pause and resume of the examinee.
At every iteration:
1. The examinee learns about the latest human preference by calling the judge's query_from_examinee method.
2. After it has updated its language model, it yields control back to the judge and allow it to evaluate it (by calling query_from_judge).
1. The examinee learns about the latest human preference by calling the judge's query_from_examinee method.
2. After it has updated its language model, it yields control back to the judge and allow it to evaluate it (by calling query_from_judge).
Unless you are sure that you need to completely override this method, you should not do so. Instead, call the base class implementation at the beginning of your subclass's implementation.
"""

Expand Down
1 change: 0 additions & 1 deletion challenges/follow.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@ def eval_snapshot(
super().eval_snapshot(examinee)

def tick(self) -> None:
"""move one timestep forward, without changing the examinee."""
super().tick()

def query_from_examinee(
Expand Down
Binary file modified doc_generation/build/doctrees/Data.doctree
Binary file not shown.
Binary file modified doc_generation/build/doctrees/Examinee.doctree
Binary file not shown.
Binary file modified doc_generation/build/doctrees/Judge.doctree
Binary file not shown.
Binary file modified doc_generation/build/doctrees/Model.doctree
Binary file not shown.
Binary file modified doc_generation/build/doctrees/running.doctree
Binary file not shown.
Binary file modified doc_generation/build/doctrees/usage.doctree
Binary file not shown.
4 changes: 2 additions & 2 deletions doc_generation/source/running.rst
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
Running the benchmark
=====================
Quickstart: Running the benchmark
=================================

Requirements
------------
Expand Down
4 changes: 2 additions & 2 deletions doc_generation/source/usage.rst
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
Implementing your own experiment
================================
Quickstart: Implementing your own experiment
============================================

To run the benchmark for your own alignment algorithm and assess its temporal alignment
abilities, you have to implement your own algorithm as a subclass of :class:`benchmark.framework.ExamineeBase`. Implement it
Expand Down
43 changes: 27 additions & 16 deletions docs/Data.html
Original file line number Diff line number Diff line change
Expand Up @@ -199,8 +199,8 @@
</form>
<div id="searchbox"></div><div class="sidebar-scroll"><div class="sidebar-tree">
<ul class="current">
<li class="toctree-l1"><a class="reference internal" href="running.html">Running the benchmark</a></li>
<li class="toctree-l1"><a class="reference internal" href="usage.html">Implementing your own experiment</a></li>
<li class="toctree-l1"><a class="reference internal" href="running.html">Quickstart: Running the benchmark</a></li>
<li class="toctree-l1"><a class="reference internal" href="usage.html">Quickstart: Implementing your own experiment</a></li>
<li class="toctree-l1 current has-children"><a class="reference internal" href="documentation.html">API reference</a><input checked="" class="toctree-checkbox" id="toctree-checkbox-1" name="toctree-checkbox-1" role="switch" type="checkbox"/><label for="toctree-checkbox-1"><div class="visually-hidden">Toggle navigation of API reference</div><i class="icon"><svg><use href="#svg-arrow-right"></use></svg></i></label><ul class="current">
<li class="toctree-l2"><a class="reference internal" href="Model.html">Model</a></li>
<li class="toctree-l2 current current-page"><a class="current reference internal" href="#">Data</a></li>
Expand Down Expand Up @@ -274,8 +274,12 @@ <h1>Data<a class="headerlink" href="#data" title="Permalink to this heading">¶<
<dd class="field-even"><p><strong>FileNotFoundError</strong> – If file is not found in default search path and path is not specified.</p>
</dd>
</dl>
<p>Example: Data(‘c4_demo’, data_type = ‘sft’, data_path = ‘./libs/llama_factory/data/c4_demo.json’)</p>
<p>Example: Data(‘c4_demo’, data_type = ‘sft’)</p>
<p>Examples:
.. code-block:: python</p>
<blockquote>
<div><p>Data(‘c4_demo’, data_type = ‘sft’, data_path = ‘./libs/llama_factory/data/c4_demo.json’)
Data(‘c4_demo’, data_type = ‘sft’)</p>
</div></blockquote>
</dd></dl>

<dl class="py method">
Expand Down Expand Up @@ -314,9 +318,9 @@ <h1>Data<a class="headerlink" href="#data" title="Permalink to this heading">¶<
<dl class="py method">
<dt class="sig sig-object py" id="src.abstractions.data.Data.save_permanent_and_register">
<span class="sig-name descname"><span class="pre">save_permanent_and_register</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">saved_name</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">forced_rewrite</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#src.abstractions.data.Data.save_permanent_and_register" title="Permalink to this definition"></a></dt>
<dd><p>Data will be saved to data_save_path from abstractions_config.json.
Without save_permanent, it will still be present in ./output/ and can still be directly used next time without specifying the full path.
Do not include path and suffix in saved_name.</p>
<dd><p>Data will be saved to <code class="code docutils literal notranslate"><span class="pre">data_save_path</span></code> from <code class="code docutils literal notranslate"><span class="pre">abstractions_config.json</span></code>.
Without save_permanent, it will still be present in <code class="code docutils literal notranslate"><span class="pre">./output/</span></code> and can still be directly used next time without specifying the full path.
Do not include path and suffix in the <code class="code docutils literal notranslate"><span class="pre">saved_name</span></code> argument.</p>
</dd></dl>

<dl class="py method">
Expand All @@ -336,9 +340,12 @@ <h1>Data<a class="headerlink" href="#data" title="Permalink to this heading">¶<
</ul>
</dd>
</dl>
<p>Examples:</p>
<p>(for pretraining dataset stored in content field) data.set_key_fields(prompt_field_name=’content’)</p>
<p>(for QA dataset with system prompt) data.set_key_fields(prompt_field_name=’instruction’, query_field_name=’input’, response_field_name=’output’)</p>
<p>Example:
.. code-block:: python</p>
<blockquote>
<div><p>data.set_key_fields(prompt_field_name=’content’) # for pretraining dataset stored in content field
data.set_key_fields(prompt_field_name=’instruction’, query_field_name=’input’, response_field_name=’output’) # for QA dataset with system prompt</p>
</div></blockquote>
</dd></dl>

<dl class="py method">
Expand Down Expand Up @@ -390,12 +397,16 @@ <h1>Data<a class="headerlink" href="#data" title="Permalink to this heading">¶<
<p>If collection_path is omitted, we will search for collection_name in directories specified in abstractions_config.json.
When file_selection_func is supplied, files will be captured real-time, instead of only when initializing. Only json files will be captured.
You may want to exclude undated.json using file_selection_func. That file is huge.</p>
<dl class="simple">
<dt>Example: DataFileCollection(collection_name=’histext_1826_to_2018’,</dt><dd><p>data_type=’pretrain’,
collection_path = ‘../../shared_storage/our_datasets/HisText_Mar8_Guten_EEBO_PoL_IA10_unrefined/’,
<p>Example:
.. code-block:: python</p>
<blockquote>
<div><dl class="simple">
<dt>DataFileCollection(collection_name=’histtext_1826_to_2018’,</dt><dd><p>data_type=’pretrain’,
collection_path = ‘./dataset/dataset_text_sequence/’,
file_selection_func = (lambda path: 1826 &lt;= int(path.split(‘/’)[-1][1:6]) &lt;= 2018))</p>
</dd>
</dl>
</div></blockquote>
</dd></dl>

<dl class="py method">
Expand Down Expand Up @@ -445,9 +456,9 @@ <h1>Data<a class="headerlink" href="#data" title="Permalink to this heading">¶<
<dl class="py method">
<dt class="sig sig-object py" id="src.abstractions.data.DataFileCollection.save_permanent">
<span class="sig-name descname"><span class="pre">save_permanent</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">saved_name</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">forced_rewrite</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">bool</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">False</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#src.abstractions.data.DataFileCollection.save_permanent" title="Permalink to this definition"></a></dt>
<dd><p>DataFileCollection will be saved to data_save_path from abstractions_config.json.
Without save_permanent, it will still be present in ./output/ and can still be directly used next time without specifying the full path.
Normally, you should not include full path and/or suffix in saved_name. If you do, it will be seen as a path. In this case, the collection may not be autodiscovered by abstractions for future use.</p>
<dd><p>DataFileCollection will be saved to <code class="code docutils literal notranslate"><span class="pre">data_save_path</span></code> from <code class="code docutils literal notranslate"><span class="pre">abstractions_config.json</span></code>.
Without save_permanent, it will still be present in <code class="code docutils literal notranslate"><span class="pre">./output/</span></code> and can still be directly used next time without specifying the full path.
Normally, you should not include full path and/or suffix in <code class="code docutils literal notranslate"><span class="pre">saved_name</span></code>. If you do, it will be seen as a path. In this case, the collection may not be autodiscovered by abstractions for future use.</p>
</dd></dl>

<dl class="py method">
Expand Down
Loading

0 comments on commit e1f7a8b

Please sign in to comment.