Index and persist a subset of postcode data

This commit is contained in:
Alex Wright 2022-10-09 18:43:20 +01:00
parent efdce13757
commit 4c24f74d6e
2 changed files with 72 additions and 4 deletions

View File

@ -9,3 +9,4 @@ edition = "2021"
csv = "1.1.6" csv = "1.1.6"
serde = { version = "1.0.145", features = ["derive"] } serde = { version = "1.0.145", features = ["derive"] }
serde_repr = "0.1.9" serde_repr = "0.1.9"
extindex = "0.5.0"

View File

@ -1,7 +1,8 @@
use csv::Reader;
use csv::ReaderBuilder; use csv::ReaderBuilder;
use extindex::{Builder, Entry, SerdeWrapper, Reader as ExtReader};
use std::error::Error; use std::error::Error;
use std::io; use std::io;
use std::path::Path;
use std::process; use std::process;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use serde; use serde;
@ -21,7 +22,7 @@ enum UserType {
Large, Large,
} }
#[derive(Serialize_repr, Deserialize_repr, PartialEq, Debug)] #[derive(Serialize_repr, Deserialize_repr, PartialEq, Debug, Clone)]
#[repr(u8)] #[repr(u8)]
enum PositionalQuality { enum PositionalQuality {
MatchedAddressPostcodeMean = 1, MatchedAddressPostcodeMean = 1,
@ -43,9 +44,9 @@ enum PositionalQuality {
// 9 No grid reference available. // 9 No grid reference available.
} }
#[derive(Debug, Deserialize)] #[derive(Debug, Deserialize, Serialize)]
struct Postcode { struct Postcode {
postcode: String, pub postcode: String,
status: Status, status: Status,
usertype: UserType, usertype: UserType,
#[serde(deserialize_with = "csv::invalid_option")] #[serde(deserialize_with = "csv::invalid_option")]
@ -66,6 +67,31 @@ struct Postcode {
incode: String, incode: String,
} }
#[derive(Serialize, Deserialize, Debug)]
struct SmolPostcode {
postcode: String,
status: Status,
usertype: UserType,
positional_quality_indicator: PositionalQuality,
country: String,
latitude: Option<String>,
longitude: Option<String>,
}
impl SmolPostcode {
fn from_postcode(full: Postcode) -> Self {
SmolPostcode {
postcode: full.postcode.to_owned(),
status: full.status,
usertype: full.usertype,
positional_quality_indicator: full.positional_quality_indicator.to_owned(),
country: full.country.to_owned(),
latitude: full.latitude.to_owned(),
longitude: full.longitude.to_owned(),
}
}
}
fn read() -> Result<(), Box<dyn Error>> { fn read() -> Result<(), Box<dyn Error>> {
let mut rdr = ReaderBuilder::new() let mut rdr = ReaderBuilder::new()
.has_headers(false) .has_headers(false)
@ -80,9 +106,50 @@ fn read() -> Result<(), Box<dyn Error>> {
Ok(()) Ok(())
} }
/*
* Thought this would be easy..
*
let postcodes = csv_iter(io::stdin());
for p in postcodes.take(10) {
println!("Postcode: {}", p);
}
fn csv_iter(file: impl io::Read + 'static) -> impl Iterator<Item=String> {
let mut reader = ReaderBuilder::new()
.has_headers(false)
.from_reader(file);
let iter = reader.deserialize();
iter
.filter_map(|r: Result<Postcode, csv::Error>| r.ok())
.map(|postcode| postcode.postcode.to_owned())
.collect()
}
*/
fn main() { fn main() {
build();
if let Err(err) = read() { if let Err(err) = read() {
println!("error running example: {}", err); println!("error running example: {}", err);
process::exit(1); process::exit(1);
} }
} }
fn build() {
let index_file_path = Path::new("./postcodes.db");
let builder: Builder<String, SerdeWrapper<SmolPostcode>> = Builder::new(index_file_path);
let mut csv_reader = ReaderBuilder::new()
.has_headers(false)
.from_reader(io::stdin());
let entries = csv_reader.deserialize()
.filter_map(|r: Result<Postcode, csv::Error>| r.ok())
.map(|full: Postcode| SmolPostcode::from_postcode(full))
.map(|smol| Entry::new(smol.postcode.to_owned(), SerdeWrapper(smol)));
builder.build(entries.into_iter()).unwrap();
let reader = ExtReader::<String, SerdeWrapper<SmolPostcode>>::open(index_file_path).unwrap();
let here = reader.find(&"LS27 8BW".to_string()).unwrap().expect("Not found");
println!("Here: {:?}", here.value().0);
}