feat: proper CLI cmd setup

This commit is contained in:
Aaron Yarborough 2024-12-18 19:12:08 +00:00
parent 1b816606b9
commit 7d923544fb
15 changed files with 236 additions and 317262 deletions

View file

@ -14,8 +14,8 @@ The list of APIs was gathered from here: https://openreferraluk.org/dashboard
1. Clone the repo
2. Run `npm install` to install node dependencies
3. Run `npm run fetch` to fetch up to date data
4. Run `npm run eval ./data/Bristol\ Council.json` to run the evaluation tool over the pulled data (swapping the path to whatever LA data you want to evaluate)
3. Run `node bin/fetch.js <url>` to fetch and save a JSON file containing an array of services data.
4. Run `node bin/eval.js <path>` to run the evaluation tool over a JSON file generated from step 3.
Results will be shown in the console:
@ -32,7 +32,13 @@ Results will be shown in the console:
# of services with a valid contact: 598/819 (73%)
```
### Updating the list of APIs
**Note:** use the `--help` CLI flag to see more options for each command.
An insomnium JSON file (`./insomoum.json`) is used to store the list of endpoints the tool fetches data from. To edit the list using a UI, download and install the [Insomnium](https://github.com/ArchGPT/insomnium) REST API client and open the JSON file.
### Example
To fetch and evaluate the quality of data from Southampton's OR UK API (https://directory.southampton.gov.uk/api):
```
$ node bin/fetch.js https://directory.southampton.gov.uk/api/services
$ node bin/eval.js ./data/directorysouthamptongovuk.json
```

109
bin/eval.js Normal file
View file

@ -0,0 +1,109 @@
const fs = require('fs');
const { hasValidId, hasValidStatus, hasValidName, hasValidDescription, hasValidOrganisation, hasValidContact, hasValidUrl } = require('../validators');
const { program } = require("commander");
program
.argument("<path>", "Path to the JSON data file.")
.option("-s, --show-services", "Shows detailed validation information for each service in a table.", false);
program.parse();
const options = program.opts();
(async () => {
const path = process.argv[2];
if (!path) {
console.error("Path not provided.");
process.exit(1);
}
// Load services array from the given JSON file
const services = await loadData(path);
// Iterate over and validate all services
const results = services.map(service => ({
id: service.id,
hasValidID: hasValidId(service),
hasValidStatus: hasValidStatus(service),
hasValidName: hasValidName(service),
hasValidDescription: hasValidDescription(service),
hasValidURL: hasValidUrl(service),
hasValidOrganisation: hasValidOrganisation(service),
hasValidContact: hasValidContact(service)
}));
if (options.showServices) {
console.table(results);
}
const validationResultMessage = (text, validTotal) =>
`${text}: ${validTotal}/${results.length} (${Math.round((validTotal / results.length) * 100)}%)`;
console.log("--------------------");
console.log("RESULTS");
console.log("--------------------");
console.log(validationResultMessage(
"# of services with a valid ID",
results.filter(x => x.hasValidID).length
));
console.log(validationResultMessage(
"# of services with a valid status",
results.filter(x => x.hasValidStatus).length,
));
console.log(validationResultMessage(
"# of services with a valid name",
results.filter(x => x.hasValidName).length
));
console.log(validationResultMessage(
"# of services with a valid description",
results.filter(x => x.hasValidDescription).length
));
console.log(validationResultMessage(
"# of services with a valid URL",
results.filter(x => x.hasValidURL).length
));
console.log(validationResultMessage(
"# of services with a valid organisation",
results.filter(x => x.hasValidOrganisation).length
));
console.log(validationResultMessage(
"# of services with a valid contact",
results.filter(x => x.hasValidContact).length
));
console.log("--------------------");
console.log(validationResultMessage(
"# of usable services",
results.filter(x =>
x.hasValidID &&
x.hasValidStatus &&
x.hasValidName &&
x.hasValidDescription &&
x.hasValidURL &&
x.hasValidOrganisation &&
x.hasValidContact).length));
console.log("--------------------");
})();
async function loadData(path) {
console.log(`Loading JSON data from ${path}...`);
try {
const text = await fs.promises.readFile(path, {
encoding: "utf-8"
});
return JSON.parse(text);
} catch (error) {
throw new Error("Unable to load JSON data file", { cause: error });
}
}

63
bin/fetch.js Normal file
View file

@ -0,0 +1,63 @@
const { wait, toFriendlyFilename } = require("../helpers");
const fs = require("fs");
const { program } = require("commander");
program
.argument("<url>", "Services URL of the OR UK API to fetch data from.")
.option("-o, --out-path", "Output path to save fetched JSON data to.", "./data/<url>.json")
.option("-w, --wait-between-req-ms", "The number of milliseconds to wait between API requests.", 500)
.option("-p, --per-page", "The number of items to return in each response.", 500);
program.parse();
const options = program.opts();
(async () => {
const url = program.args[0];
const host = new URL(url).host;
const outPath = (options.outPath.replace("<url>", toFriendlyFilename(host)));
// Get services
console.log(`Fetching services from ${url}...`);
const services = await fetchServices(url, options.perPage, options.waitBetweenReqMs);
// Write to file
await fs.promises.writeFile(
outPath,
JSON.stringify(services, null, 2)
);
console.log(`Wrote services ${services.length} to ${outPath}.`)
})();
async function fetchServices(ep, perPage, waitMs) {
let page = 1;
let services = [];
do {
const pagedEp = `${ep}?page=${page}&per_page=${perPage}`;
const resp = await fetch(pagedEp);
if (!resp.ok) {
console.warn(
`Could not fetch services for ${ep}. Skipping. (Resp code ${resp.status})`
);
return [];
}
const json = await resp.json();
services = [...services, ...json.content];
if (json.last == true) {
console.log("All services found!");
break;
}
console.log(`Loaded ${services.length} of total ${json.totalElements}...`);
page++;
await wait(waitMs);
} while (true);
return services;
}

File diff suppressed because one or more lines are too long

View file

@ -1 +0,0 @@
[]

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load diff

File diff suppressed because one or more lines are too long

View file

@ -2,6 +2,15 @@ function wait(ms) {
return new Promise(resolve => setTimeout(resolve, ms));
}
function toFriendlyFilename(str) {
return str
.trim() // Remove leading and trailing whitespace
.toLowerCase() // Convert to lowercase
.replace(/[^a-z0-9\s]/g, '') // Remove non-alphanumeric characters except spaces
.replace(/\s+/g, '_'); // Replace spaces with underscores
}
module.exports = {
wait
wait,
toFriendlyFilename
};

View file

@ -1,318 +0,0 @@
{
"_type": "export",
"__export_format": 4,
"__export_date": "2024-12-13T14:50:44.419Z",
"__export_source": "insomnia.desktop.app:v0.2.2",
"resources": [
{
"_id": "req_5a17e696d356437d8b08f73db7d53d9f",
"parentId": "fld_7249563264484823be781bbc7ce7d972",
"modified": 1734101410939,
"created": 1734101404967,
"url": "https://directory.southampton.gov.uk/api/services",
"name": "/services",
"description": "",
"method": "GET",
"body": {},
"parameters": [],
"headers": [
{
"name": "User-Agent",
"value": "insomnia/0.2.2"
}
],
"authentication": {},
"metaSortKey": -1734101404967,
"isPrivate": false,
"settingStoreCookies": true,
"settingSendCookies": true,
"settingDisableRenderRequestBody": false,
"settingEncodeUrl": true,
"settingRebuildPath": true,
"settingFollowRedirects": "global",
"segmentParams": [],
"_type": "request"
},
{
"_id": "fld_7249563264484823be781bbc7ce7d972",
"parentId": "wrk_b6699c43c9a1492787836ea5019c5982",
"modified": 1734101402543,
"created": 1734101402543,
"name": "Southampton Council",
"description": "",
"environment": {},
"environmentPropertyOrder": null,
"metaSortKey": -1734101402543,
"_type": "request_group"
},
{
"_id": "wrk_b6699c43c9a1492787836ea5019c5982",
"parentId": null,
"modified": 1734101147095,
"created": 1734101147095,
"name": "ORUK LAs",
"description": "",
"scope": "collection",
"_type": "workspace"
},
{
"_id": "req_e2cc674b1e1b41bd8e4b95b78c65c7f8",
"parentId": "fld_12fd3f34f0ea4844bba2909a227cdd1a",
"modified": 1734101381629,
"created": 1734101358188,
"url": "https://penninelancs.openplace.directory/o/ServiceDirectoryService/v2/services",
"name": "/services",
"description": "",
"method": "GET",
"body": {},
"parameters": [],
"headers": [
{
"name": "User-Agent",
"value": "insomnia/0.2.2"
}
],
"authentication": {},
"metaSortKey": -1734101358188,
"isPrivate": false,
"settingStoreCookies": true,
"settingSendCookies": true,
"settingDisableRenderRequestBody": false,
"settingEncodeUrl": true,
"settingRebuildPath": true,
"settingFollowRedirects": "global",
"segmentParams": [],
"_type": "request"
},
{
"_id": "fld_12fd3f34f0ea4844bba2909a227cdd1a",
"parentId": "wrk_b6699c43c9a1492787836ea5019c5982",
"modified": 1734101355947,
"created": 1734101355947,
"name": "Pennine Lancashire ICP",
"description": "",
"environment": {},
"environmentPropertyOrder": null,
"metaSortKey": -1734101355947,
"_type": "request_group"
},
{
"_id": "req_dc6bb46f106f43e79ce5e0d7dc9051a5",
"parentId": "fld_0db8e7de61bc4d11a07117caea843b44",
"modified": 1734101326671,
"created": 1734101319555,
"url": "https://northlincs.openplace.directory/o/ServiceDirectoryService/v2/services",
"name": "/services",
"description": "",
"method": "GET",
"body": {},
"parameters": [],
"headers": [
{
"name": "User-Agent",
"value": "insomnia/0.2.2"
}
],
"authentication": {},
"metaSortKey": -1734101319555,
"isPrivate": false,
"settingStoreCookies": true,
"settingSendCookies": true,
"settingDisableRenderRequestBody": false,
"settingEncodeUrl": true,
"settingRebuildPath": true,
"settingFollowRedirects": "global",
"segmentParams": [],
"_type": "request"
},
{
"_id": "fld_0db8e7de61bc4d11a07117caea843b44",
"parentId": "wrk_b6699c43c9a1492787836ea5019c5982",
"modified": 1734101317039,
"created": 1734101317039,
"name": "North Lincolnshire Council",
"description": "",
"environment": {},
"environmentPropertyOrder": null,
"metaSortKey": -1734101317039,
"_type": "request_group"
},
{
"_id": "req_35869d2eaffc4819858002ce7d5131f6",
"parentId": "fld_7db16d60402144d2ba30852b58a5bd82",
"modified": 1734101288057,
"created": 1734101280037,
"url": "https://api.familyinfo.buckinghamshire.gov.uk/api/v1/services",
"name": "/services",
"description": "",
"method": "GET",
"body": {},
"parameters": [],
"headers": [
{
"name": "User-Agent",
"value": "insomnia/0.2.2"
}
],
"authentication": {},
"metaSortKey": -1734101284292,
"isPrivate": false,
"settingStoreCookies": true,
"settingSendCookies": true,
"settingDisableRenderRequestBody": false,
"settingEncodeUrl": true,
"settingRebuildPath": true,
"settingFollowRedirects": "global",
"segmentParams": [],
"_type": "request"
},
{
"_id": "fld_7db16d60402144d2ba30852b58a5bd82",
"parentId": "wrk_b6699c43c9a1492787836ea5019c5982",
"modified": 1734101277775,
"created": 1734101277775,
"name": "Buckinghamshire Council",
"description": "",
"environment": {},
"environmentPropertyOrder": null,
"metaSortKey": -1734101277775,
"_type": "request_group"
},
{
"_id": "req_c2e99c36c4ec4deb9c283e6a47076f3d",
"parentId": "fld_ab7b6940dd0c44049d4e41af5c68637c",
"modified": 1734101205414,
"created": 1734101149071,
"url": "https://bristol.openplace.directory/o/ServiceDirectoryService/v2/hservices",
"name": "/services",
"description": "",
"method": "GET",
"body": {},
"parameters": [],
"headers": [
{
"name": "User-Agent",
"value": "insomnia/0.2.2"
}
],
"authentication": {},
"metaSortKey": -1734101205391,
"isPrivate": false,
"settingStoreCookies": true,
"settingSendCookies": true,
"settingDisableRenderRequestBody": false,
"settingEncodeUrl": true,
"settingRebuildPath": true,
"settingFollowRedirects": "global",
"segmentParams": [],
"_type": "request"
},
{
"_id": "fld_ab7b6940dd0c44049d4e41af5c68637c",
"parentId": "wrk_b6699c43c9a1492787836ea5019c5982",
"modified": 1734101191560,
"created": 1734101191560,
"name": "Bristol Council",
"description": "",
"environment": {},
"environmentPropertyOrder": null,
"metaSortKey": -1734101191560,
"_type": "request_group"
},
{
"_id": "env_de6c835521010ba26be4220f11dc5a6f5069ddf3",
"parentId": "wrk_b6699c43c9a1492787836ea5019c5982",
"modified": 1734101147098,
"created": 1734101147098,
"name": "Base Environment",
"data": {},
"dataPropertyOrder": null,
"color": null,
"isPrivate": false,
"metaSortKey": 1734101147098,
"_type": "environment"
},
{
"_id": "jar_de6c835521010ba26be4220f11dc5a6f5069ddf3",
"parentId": "wrk_b6699c43c9a1492787836ea5019c5982",
"modified": 1734101376960,
"created": 1734101147098,
"name": "Default Jar",
"cookies": [
{
"key": "JSESSIONID",
"value": "1B079D52EEAE9DBC6783C96D71EDF399",
"domain": "bristol.openplace.directory",
"path": "/",
"secure": true,
"httpOnly": true,
"hostOnly": true,
"creation": "2024-12-13T14:46:10.292Z",
"lastAccessed": "2024-12-13T14:46:10.292Z",
"id": "4630461386775335"
},
{
"key": "AWSELB",
"value": "2D870D5F0E8860F2EF95DB74F9FDC976417AF8CE68BB383FC3865E321F4838F1CD6B5C9AF23B387FD2E2AAA3FA0C38F5BEF0D9172D5C9046BC720034DB7DD4D5154F9B072EE2AA12398CEDCEC6C4A209C1F79F782A",
"domain": "bristol.openplace.directory",
"path": "/",
"secure": true,
"httpOnly": true,
"hostOnly": true,
"creation": "2024-12-13T14:46:10.294Z",
"lastAccessed": "2024-12-13T14:46:10.294Z",
"id": "12139281671488988"
},
{
"key": "JSESSIONID",
"value": "DF04FB103146B7994E88BCC9E7B72F31",
"domain": "northlincs.openplace.directory",
"path": "/",
"secure": true,
"httpOnly": true,
"hostOnly": true,
"creation": "2024-12-13T14:48:41.552Z",
"lastAccessed": "2024-12-13T14:48:41.552Z",
"id": "1023352684773664"
},
{
"key": "AWSELB",
"value": "2D870D5F0E8860F2EF95DB74F9FDC976417AF8CE6809D3AEDDCA61FB82F1F2060AEEA8F11C6541C0E2BA34496CB5FB94A03D9314795C9046BC720034DB7DD4D5154F9B072EE2AA12398CEDCEC6C4A209C1F79F782A",
"domain": "northlincs.openplace.directory",
"path": "/",
"secure": true,
"httpOnly": true,
"hostOnly": true,
"creation": "2024-12-13T14:48:41.552Z",
"lastAccessed": "2024-12-13T14:48:41.552Z",
"id": "8515615299671728"
},
{
"key": "JSESSIONID",
"value": "0144C070FC1D17CE851A8B6A2C8F5B61",
"domain": "penninelancs.openplace.directory",
"path": "/",
"secure": true,
"httpOnly": true,
"hostOnly": true,
"creation": "2024-12-13T14:49:36.958Z",
"lastAccessed": "2024-12-13T14:49:36.958Z",
"id": "7743775872235581"
},
{
"key": "AWSELB",
"value": "2D870D5F0E8860F2EF95DB74F9FDC976417AF8CE68BFA6CBA6790DEEA2DD2A9AC9FAFF5B7776B09675239D14A769B864D1960639ED5C9046BC720034DB7DD4D5154F9B072EE2AA12398CEDCEC6C4A209C1F79F782A",
"domain": "penninelancs.openplace.directory",
"path": "/",
"secure": true,
"httpOnly": true,
"hostOnly": true,
"creation": "2024-12-13T14:49:36.959Z",
"lastAccessed": "2024-12-13T14:49:36.959Z",
"id": "8352805125006024"
}
],
"_type": "cookie_jar"
}
]
}

23
package-lock.json generated
View file

@ -1,25 +1,20 @@
{
"name": "fh-or-tester",
"name": "or-uk-data-quality-checker",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "fh-or-tester",
"name": "or-uk-data-quality-checker",
"dependencies": {
"uuid": "^11.0.3"
"commander": "^12.1.0"
}
},
"node_modules/uuid": {
"version": "11.0.3",
"resolved": "https://registry.npmjs.org/uuid/-/uuid-11.0.3.tgz",
"integrity": "sha512-d0z310fCWv5dJwnX1Y/MncBAqGMKEzlBb1AOf7z9K8ALnd0utBX/msg/fA0+sbyN1ihbMsLhrBlnl1ak7Wa0rg==",
"funding": [
"https://github.com/sponsors/broofa",
"https://github.com/sponsors/ctavan"
],
"license": "MIT",
"bin": {
"uuid": "dist/esm/bin/uuid"
"node_modules/commander": {
"version": "12.1.0",
"resolved": "https://registry.npmjs.org/commander/-/commander-12.1.0.tgz",
"integrity": "sha512-Vw8qHK3bZM9y/P10u3Vib8o/DdkvA2OtPtZvD871QKjy74Wj1WSKFILMPRPSdUSx5RFK1arlJzEtA4PkFgnbuA==",
"engines": {
"node": ">=18"
}
}
}

View file

@ -1,8 +1,9 @@
{
"name": "or-uk-data-quality-checker",
"description": "A tool to fetch and validate the quality of data from Open Referral UK APIs.",
"version": "1.0.0",
"main": "index.js",
"scripts": {
"fetch": "node script-fetch.js",
"eval": "node script-eval.js"
"dependencies": {
"commander": "^12.1.0"
}
}

View file

@ -1,93 +0,0 @@
const fs = require('fs');
(async () => {
const path = process.argv[0];
if (!path) {
throw new Error("Path not provided.");
}
const results = [];
const services = await loadData(process.argv[2]);
for (let service of services) {
results.push({
id: service.id,
hasValidID: hasValidID(service),
hasValidStatus: hasValidStatus(service),
hasValidName: hasValidName(service),
hasValidDescription: hasValidDescription(service),
hasValidURL: hasValidURL(service),
hasValidOrganisation: hasValidOrganisation(service),
hasValidContact: hasValidContact(service)
});
}
// console.table(results);
const totalWithValidID = results.filter(x => x.hasValidID).length;
console.log(`# of services with a valid ID: ${totalWithValidID}/${results.length} (${Math.round((totalWithValidID/results.length)*100)}%)`);
const totalWithValidStatus = results.filter(x => x.hasValidStatus).length;
console.log(`# of services with a valid status: ${totalWithValidStatus}/${results.length} (${Math.round((totalWithValidStatus/results.length)*100)}%)`);
const totalWithValidName = results.filter(x => x.hasValidName).length;
console.log(`# of services with a valid name: ${totalWithValidName}/${results.length} (${Math.round((totalWithValidName/results.length)*100)}%)`);
const totalWithValidDescription = results.filter(x => x.hasValidDescription).length;
console.log(`# of services with a valid description: ${totalWithValidDescription}/${results.length} (${Math.round((totalWithValidDescription/results.length)*100)}%)`);
const totalWithValidURL = results.filter(x => x.hasValidURL).length;
console.log(`# of services with a valid URL: ${totalWithValidURL}/${results.length} (${Math.round((totalWithValidURL/results.length)*100)}%)`);
const totalWithValidOrganisation = results.filter(x => x.hasValidOrganisation).length;
console.log(`# of services with a valid organisation: ${totalWithValidOrganisation}/${results.length} (${Math.round((totalWithValidOrganisation/results.length)*100)}%)`);
const totalWithValidContact = results.filter(x => x.hasValidContact).length;
console.log(`# of services with a valid contact: ${totalWithValidContact}/${results.length} (${Math.round((totalWithValidContact/results.length)*100)}%)`);
console.log("--------------------");
const totalUsableServices = results.filter(x =>
x.hasValidID &&
x.hasValidStatus &&
x.hasValidName &&
x.hasValidDescription &&
x.hasValidURL &&
x.hasValidOrganisation &&
x.hasValidContact).length;
console.log(`# of valid usable services: ${totalUsableServices}/${results.length} (${Math.round((totalUsableServices/results.length)*100)}%)`);
})();
async function loadData(path) {
const text = await fs.promises.readFile(path, {
encoding: "utf-8"
});
return JSON.parse(text);
}
// Validators
const hasValidID = (s) => {
return /^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$/i.test(s.id.trim());
}
const hasValidName = (s) => typeof s.name === "string" && s.name.length > 0;
const hasValidDescription = (s) => typeof s.description === "string" && s.description.length > 0;
const hasValidURL = (s) => {
try {
new URL(s.url);
return true;
} catch (_) {
return false;
}
};
const hasValidOrganisation = (s) => !!s.organization?.id && !!s.organization?.name;
const hasValidContact = (s) => !!s.email?.length > 0;
const hasValidStatus = (s) => s.status === "active";

View file

@ -1,68 +0,0 @@
const { wait } = require("./helpers");
const insomnium = require("./insomnium.json");
const fs = require("fs");
const WAIT_BETWEEN_REQS_MS = 500;
const PER_PAGE=500;
const groups = insomnium.resources
.filter(res => res._type === "request_group");
const las = insomnium.resources
.filter(res =>
res._type === "request" &&
res.name === "/services")
.map(res => ({
group: groups.find(group => group._id == res.parentId),
res
}));
(async() => {
console.log('Fetching LAs...');
for (let la of las) {
const laName = la.group.name;
console.log(`Fetching services for ${laName}...`);
const services = await fetchServices(la.res.url);
console.log(`Found ${services.length} services for ${laName}!`);
await writeServices(laName, services);
}
})();
async function fetchServices(ep) {
let page = 1;
let services = [];
do {
const pagedEp = `${ep}?page=${page}&per_page=${PER_PAGE}`;
const resp = await fetch(pagedEp);
console.log(pagedEp);
if (!resp.ok) {
console.warn(`Could not fetch services for ${ep}. Skipping. (Resp code ${resp.status})`);
return [];
}
const json = await resp.json();
services = [...services, ...json.content];
if (json.last == true) {
console.log("All services found!")
break;
}
console.log(`Loaded ${services.length} of total ${json.totalElements}...`);
page++;
await wait(WAIT_BETWEEN_REQS_MS);
} while (true);
return services;
}
async function writeServices(name, services) {
await fs.promises.writeFile(`data/${name}.json`, JSON.stringify(services, null, 2));
}

31
validators.js Normal file
View file

@ -0,0 +1,31 @@
const hasValidId = (s) =>
/^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$/i.test(s.id.trim());
const hasValidName = (s) => typeof s.name === "string" && s.name.length > 0;
const hasValidDescription = (s) => typeof s.description === "string" && s.description.length > 0;
const hasValidUrl = (s) => {
try {
new URL(s.url);
return true;
} catch (_) {
return false;
}
};
const hasValidOrganisation = (s) => !!s.organization?.id && !!s.organization?.name;
const hasValidContact = (s) => !!s.email?.length > 0;
const hasValidStatus = (s) => s.status === "active";
module.exports = {
hasValidId,
hasValidName,
hasValidDescription,
hasValidUrl,
hasValidOrganisation,
hasValidContact,
hasValidStatus
};