Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix category parsing of quoted headers #51

Merged
merged 2 commits into from
Jan 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 16 additions & 20 deletions src/trackers/helpers/getCategory.js
Original file line number Diff line number Diff line change
@@ -1,32 +1,28 @@
const fs = require('fs')
const _ = require('underscore')
const parse = require('csv-parse/lib/sync')

function getCategories (categoryCSVfilePath) {
const categoryCSV = fs.readFileSync(categoryCSVfilePath, 'utf8').split('\n')
const categoryHeader = categoryCSV.shift()
.replace(/\r/gi, "")
.split(',').slice(1)
const records = parse(fs.readFileSync(categoryCSVfilePath, 'utf8'), {
columns: true,
delimiter: ',',
})
console.log(records)

const domainToCategory = categoryCSV.reduce((obj, row) => {

row = parse(row)[0]
if (!row) {return obj}

const domain = row[0]

// clean up category values. 1 means this is in the category, anything else no idea so skip it
const rowArray = Array.from(row.slice(1)).map(c => {
const domainToCategory = records.reduce((obj, row) => {
const domain = row.domain
obj[domain] = row
delete row.domain
Object.keys(row).forEach(category => {
const c = row[category]
if (c === '1') {
return 1
row[category] = 1
} else if (c === '0' || c === '') {
return 0
row[category] = 0
} else {
console.log(`unknown category value for ${domain}, ${c}`)
row[category] = null
}
console.log(`unknown category value for ${domain}, ${c}`)
return null
})

obj[domain] = _.object(categoryHeader, rowArray)
return obj
}, {})
return domainToCategory
Expand Down
21 changes: 21 additions & 0 deletions test/categories.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
const assert = require('assert')
const {describe, it, before} = require('mocha')
const { getCategories } = require('../src/trackers/helpers/getCategory')

describe('getCategories', () => {
it('generates a domain/category map', () => {
const domainToCategpry = getCategories('test/fixtures/categorized_trackers.csv')
assert.deepStrictEqual(domainToCategpry, {
'ads.com': {
'First Category': 1,
'Advertising': 1,
},
'example.com': {
'First Category': 0,
'Advertising': 1,
}
})
})
})


3 changes: 3 additions & 0 deletions test/fixtures/categorized_trackers.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
domain,"First Category",Advertising
example.com,,1
ads.com,1,1
Loading