Skip to content

Commit

Permalink
Update category parser to handle quoted headers.
Browse files Browse the repository at this point in the history
  • Loading branch information
sammacbeth committed Jan 26, 2024
1 parent 56d7d50 commit 4a1fe7d
Showing 1 changed file with 16 additions and 20 deletions.
36 changes: 16 additions & 20 deletions src/trackers/helpers/getCategory.js
Original file line number Diff line number Diff line change
@@ -1,32 +1,28 @@
const fs = require('fs')
const _ = require('underscore')
const parse = require('csv-parse/lib/sync')

function getCategories (categoryCSVfilePath) {
const categoryCSV = fs.readFileSync(categoryCSVfilePath, 'utf8').split('\n')
const categoryHeader = categoryCSV.shift()
.replace(/\r/gi, "")
.split(',').slice(1)
const records = parse(fs.readFileSync(categoryCSVfilePath, 'utf8'), {
columns: true,
delimiter: ',',
})
console.log(records)

const domainToCategory = categoryCSV.reduce((obj, row) => {

row = parse(row)[0]
if (!row) {return obj}

const domain = row[0]

// clean up category values. 1 means this is in the category, anything else no idea so skip it
const rowArray = Array.from(row.slice(1)).map(c => {
const domainToCategory = records.reduce((obj, row) => {
const domain = row.domain
obj[domain] = row
delete row.domain
Object.keys(row).forEach(category => {
const c = row[category]
if (c === '1') {
return 1
row[category] = 1
} else if (c === '0' || c === '') {
return 0
row[category] = 0
} else {
console.log(`unknown category value for ${domain}, ${c}`)
row[category] = null
}
console.log(`unknown category value for ${domain}, ${c}`)
return null
})

obj[domain] = _.object(categoryHeader, rowArray)
return obj
}, {})
return domainToCategory
Expand Down

0 comments on commit 4a1fe7d

Please sign in to comment.