saffron/
storage.rs

1//! This file handles the storage gestion for the state replicator ;
2//! The data are stored on disk in a file, and this file provide functions to
3//! read the whole file (because producing a read proof requires the whole
4//! polynomial) and to update dispersed chunks of data.
5//!
6//! Note: the encoding used for the conversion bytes <-> scalars is the `full`
7//! encoding, meaning that fields elements are encoded over `F::size_in_bytes()`
8//! bytes which is 32 for Pallas & Vesta.
9//! Using the 31 version leads currently to inconsistency when updating if the
10//! diff's new values are greater than what is representable over 31 bytes.
11
12use crate::{commitment::*, diff::Diff, encoding, utils::evals_to_polynomial};
13use ark_ff::PrimeField;
14use ark_poly::{univariate::DensePolynomial, EvaluationDomain, Radix2EvaluationDomain as R2D};
15use kimchi::curve::KimchiCurve;
16use poly_commitment::ipa::SRS;
17use std::{
18    fs::{File, OpenOptions},
19    io::{Read, Seek, SeekFrom, Write},
20};
21
22use crate::SRS_SIZE;
23
24pub struct Data<F: PrimeField> {
25    pub data: Vec<F>,
26}
27
28impl<F: PrimeField> Data<F> {
29    /// Returns the data correpsonding to the provided `bytes`
30    pub fn of_bytes(bytes: &[u8]) -> Data<F> {
31        Data {
32            data: encoding::encode_as_field_elements_full(bytes),
33        }
34    }
35
36    pub fn is_empty(&self) -> bool {
37        self.data.is_empty()
38    }
39
40    /// Returns the length of the data
41    pub fn len(&self) -> usize {
42        self.data.len()
43    }
44
45    /// Returns the polynomial that correspond to the data. If the data is
46    /// bigger than domain's size, the additionnal points will be ignored.
47    /// If the data is smaller, it is padded with zeros
48    pub fn to_polynomial(&self, domain: R2D<F>) -> DensePolynomial<F> {
49        use std::iter;
50        let n = domain.size();
51        let padded_data: Vec<F> = self
52            .data
53            .iter()
54            .cloned()
55            .chain(iter::repeat(F::zero()))
56            .take(n)
57            .collect();
58        evals_to_polynomial(padded_data, domain)
59    }
60
61    /// Commit a `data` of length smaller than `SRS_SIZE`
62    /// If greater data is provided, anything above `SRS_SIZE` is ignored
63    pub fn to_commitment<G: KimchiCurve<ScalarField = F>>(&self, srs: &SRS<G>) -> Commitment<G> {
64        Commitment::from_data(srs, &self.data)
65    }
66
67    /// Modifies inplace the provided data with `diff`
68    pub fn apply_inplace(&mut self, diff: &Diff<F>) {
69        let data_slice = std::slice::from_mut(&mut self.data);
70        Diff::apply_inplace(data_slice, diff);
71    }
72
73    /// Returns a new data corresponding to the provided data with `diff` applied
74    pub fn apply(&self, diff: &Diff<F>) -> Data<F> {
75        let mut data = Data {
76            data: self.data.clone(),
77        };
78        data.apply_inplace(diff);
79        data
80    }
81}
82
83/// Creates a file at `path` and fill it with `data`
84/// TODO: For now, we assume the data vector is smaller than SRS_SIZE
85pub fn init<F: PrimeField>(path: &str, data: &Data<F>) -> std::io::Result<()> {
86    // TODO: handle the > SRS_SIZE case
87    assert!(data.len() <= SRS_SIZE);
88    let mut file = File::create(path)?;
89    for x in &data.data {
90        let x_bytes = encoding::decode_full(*x);
91        file.write_all(&x_bytes)?
92    }
93    Ok(())
94}
95
96/// `read(path)` loads the whole content of the file in `path` and stores it as
97/// bytes.
98/// This function raises an error when the path does not exist, or if there is
99/// an issue with reading.
100pub fn read<F: PrimeField>(path: &str) -> std::io::Result<Data<F>> {
101    let mut file = File::open(path)?;
102    let mut buffer = Vec::new();
103    file.read_to_end(&mut buffer)?;
104    // TODO: handle the > SRS_SIZE case (ie Vec<Vec<F>>)
105    Ok(Data::of_bytes(&buffer))
106}
107
108/// Takes a valid diff and update the file accordingly, replacing the old
109/// values by the new ones at the specified indices ; the indices of the diff
110/// are specified by scalars (not by bytes) and the values of the diff are the
111/// new scalar value expected for the new data.
112/// Note that this only update the file, not the commitment
113pub fn update<F: PrimeField>(path: &str, diff: &Diff<F>) -> std::io::Result<()> {
114    // Open the file in read mode to get the old value & write mode to write the new value
115    let mut file = OpenOptions::new().read(true).write(true).open(path)?;
116    let region_offset = diff.region * (SRS_SIZE as u64);
117    let scalar_size = encoding::encoding_size_full::<F>() as u64;
118    for (index, diff_value) in diff.addresses.iter().zip(diff.diff_values.iter()) {
119        let corresponding_bytes_index = (region_offset + index) * scalar_size;
120        file.seek(SeekFrom::Start(corresponding_bytes_index))?;
121        let new_value: F = {
122            // The old value is taken directly from the file
123            let old_value: F = {
124                // Save the current cursor position to be able to reset the
125                // cursor after the read later
126                let pos = file.stream_position()?;
127                let mut old_value_bytes = vec![0u8; encoding::encoding_size_full::<F>()];
128                file.read_exact(&mut old_value_bytes)?;
129                // Go back to the previous position in the file, so the read value
130                // will be overwritten by the new one
131                file.seek(SeekFrom::Start(pos))?;
132                encoding::encode(&old_value_bytes)
133            };
134            old_value + diff_value
135        };
136        let new_value_bytes = encoding::decode_full(new_value);
137        file.write_all(&new_value_bytes)?;
138    }
139    Ok(())
140}
141
142#[cfg(test)]
143mod tests {
144    use crate::{
145        commitment::Commitment, diff::Diff, encoding, storage, storage::Data, Curve, ScalarField,
146        SRS_SIZE,
147    };
148    use ark_ff::{One, UniformRand, Zero};
149    use poly_commitment::ipa::SRS;
150    use rand::Rng;
151    use std::fs;
152    use tempfile::NamedTempFile;
153
154    #[test]
155    // Test that data commitment stays the same after reading (i.e. data stay
156    // consistent through writing and reading), and test that update is
157    // consistently performed in the file
158    fn test_data_consistency() {
159        let mut rng = o1_utils::tests::make_test_rng(None);
160
161        let srs: SRS<Curve> = poly_commitment::precomputed_srs::get_srs_test();
162
163        // Path of the file that will contain the test data
164        let file = NamedTempFile::new().unwrap();
165        let path = file.path().to_str().unwrap();
166
167        let data_bytes: Vec<u8> = (0..(SRS_SIZE * (encoding::encoding_size_full::<ScalarField>())))
168            .map(|_| rng.gen())
169            .collect();
170        let mut data = Data::of_bytes(&data_bytes);
171        // Setting the first value of data to zero will make the updated bytes
172        // with the well chosen diff
173        data.data[0] = ScalarField::zero();
174        let data_comm = data.to_commitment(&srs);
175
176        let read_consistency = {
177            let _init_storage_file = storage::init(path, &data);
178            let read_data = storage::read(path).unwrap();
179            let read_data_comm = read_data.to_commitment(&srs);
180
181            // True if read data are the same as initial data
182            Commitment::eq(&data_comm, &read_data_comm)
183        };
184
185        let (data_updated, update_consistency, diff_comm_consistency) = {
186            let diff = {
187                // The number of updates is proportional to the data length,
188                // but we make sure to have at least one update if the data is
189                // small
190                let nb_updates = std::cmp::max(data.len() / 20, 1);
191                let region = 0;
192                let addresses: Vec<u64> = (0..nb_updates)
193                    .map(|_| (rng.gen_range(0..data.len() as u64)))
194                    .collect();
195                let mut diff_values: Vec<ScalarField> = addresses
196                    .iter()
197                    .map(|_| ScalarField::rand(&mut rng))
198                    .collect();
199                // The first value is replaced by a scalar that would
200                // overflow 31 bytes, so the update is not consistent and the
201                // test fails if this case is not handled
202                diff_values[0] = ScalarField::zero() - ScalarField::one();
203                Diff {
204                    region,
205                    addresses,
206                    diff_values,
207                }
208            };
209
210            let updated_data = data.apply(&diff);
211            let updated_data_comm = updated_data.to_commitment(&srs);
212
213            let _file_update = storage::update(path, &diff);
214
215            let updated_read_data = storage::read(path).unwrap();
216            let updated_read_data_comm = updated_read_data.to_commitment(&srs);
217
218            let updated_diff_data_comm = data_comm.update(&srs, diff);
219
220            (
221                // True if the data have changed because of the update
222                Commitment::ne(&updated_data_comm, &data_comm),
223                // True if read data from updated file are the same as updated data
224                Commitment::eq(&updated_data_comm, &updated_read_data_comm),
225                // True if the commitments are the same as the commitment obtained by direct diff application
226                Commitment::eq(&updated_diff_data_comm, &updated_data_comm),
227            )
228        };
229
230        let _remove_file = fs::remove_file(path);
231
232        assert!(read_consistency);
233        assert!(data_updated);
234        assert!(update_consistency);
235        assert!(diff_comm_consistency);
236    }
237}