Skip to content

Commit

Permalink
Expand VitalSource chapter selections to end of HTML document
Browse files Browse the repository at this point in the history
Fix an issue where selections in the VitalSource book picker could create
assignments with empty page ranges, resulting in a warning being shown about
being outside of the assignment regardless of the current page.

When determining the endpoint of the CFI range for a selected chapter, find the
next CFI which is at a level equal to or above that of the selected entry _and_
refers to a different PDF page or HTML document.

Given a TOC structure like this:

```
- Chapter 1 (CFI: /2!/4, Page 2)
  - Chapter 1 - Section 1 (CFI: /2!/6, Page 3)
  - Chapter 1 - Section 12 (CFI: /2!/8, Page 4)
- Chapter 2 (CFI: /4!/2, Page 5)
```

If the user selects eg. "Chapter 1 - Section 1" the selected CFI range was
previously `/2!/6-/2!/8` but is now `/2!/6-/4!/2`. The page range shown in
the UI would previously have been `3-4` and is now `3-5`.

The reason for doing this is that the Hypothesis client discards the part of the
CFIs after the "!" and treats the endpoint of the stripped CFI as exclusive.
Therefore `/2!/6-/2!/8` is an empty range whereas `/2!/6-/4!/2` is a range
containing the whole of the document identified by `/2`.

The reason why the Hypothesis client discards the part after the "!" is because
the minimum granularity at which it operates is a single HTML document or PDF
page, identified by the part before the "!".

Fixes hypothesis/support#153.
  • Loading branch information
robertknight committed Oct 25, 2024
1 parent 9a12803 commit 8b4c583
Show file tree
Hide file tree
Showing 4 changed files with 253 additions and 51 deletions.
21 changes: 17 additions & 4 deletions lms/static/scripts/frontend_apps/components/BookPicker.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import { useCallback, useMemo, useEffect, useState } from 'preact/hooks';
import type { Book, TableOfContentsEntry } from '../api-types';
import { useService, VitalSourceService } from '../services';
import type { ContentRange, Selection } from '../services/vitalsource';
import { documentCFI } from '../utils/cfi';
import { isPageRangeValid } from '../utils/vitalsource';
import BookSelector from './BookSelector';
import ErrorDisplay from './ErrorDisplay';
Expand Down Expand Up @@ -123,8 +124,17 @@ export default function BookPicker({
setStep('select-toc');
}, []);

// Return the next entry after `entry` which is at the same or higher level
// in the table of contents.
// Find the table of contents entry which comes after `entry`. This serves
// as the exclusive end point for a selection starting at `entry`.
//
// More specifically this finds the first entry after `entry` which:
//
// 1. Is at the same or higher level in the table of contents.
// 2. Refers to a different segment (HTML or PDF page) within the book.
//
// Condition (2) is needed because the minimum granularity that the Hypothesis
// client can filter annotations by is a single HTML or PDF page. Table of
// contents entries in the ePUB however can be more fine-grained than this.
const nextEntryAfter = useCallback(
(entry: TableOfContentsEntry) => {
/* istanbul ignore next - early exit should be unreachable */
Expand All @@ -133,11 +143,14 @@ export default function BookPicker({
}
const idx = tableOfContents.indexOf(entry);
const level = entry.level ?? 0;
const currentDocCFI = documentCFI(entry.cfi);

let nextEntry;
for (let i = idx + 1; i < tableOfContents.length; i++) {
const entryLevel = tableOfContents[i].level ?? 0;
if (entryLevel <= level) {
const entryAtIndex = tableOfContents[i];
const entryDocCFI = documentCFI(entryAtIndex.cfi);
const entryLevel = entryAtIndex.level ?? 0;
if (entryLevel <= level && entryDocCFI !== currentDocCFI) {
nextEntry = tableOfContents[i];
break;
}
Expand Down
176 changes: 129 additions & 47 deletions lms/static/scripts/frontend_apps/components/test/BookPicker-test.js
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,11 @@ const fakeBookData = {
title: 'Book Two',
cover_image: 'https://bookstore.com/covers/book2.jpg',
},
book3: {
id: 'book3',
title: 'Book Three',
cover_image: 'https://bookstore.com/covers/book3.jpg',
},
},

chapters: {
Expand Down Expand Up @@ -49,6 +54,31 @@ const fakeBookData = {
page: '7',
},
],

book3: [
// EPUB book which is divided into two HTML pages. For the first page,
// there are multiple TOC entries.
{
title: 'Chapter A',
cfi: '/2',
page: '3',
},
{
title: 'Chapter A - Part 1',
cfi: '/2!/2',
page: '3',
},
{
title: 'Chapter A - Part 2',
cfi: '/2!/4',
page: '4',
},
{
title: 'Chapter B',
cfi: '/4',
page: '7',
},
],
},
};

Expand Down Expand Up @@ -88,9 +118,9 @@ describe('BookPicker', () => {
$imports.$restore();
});

const selectBook = wrapper => {
const selectBook = (wrapper, bookId = 'book1') => {
const bookSelector = wrapper.find('BookSelector');
const book = fakeBookData.books.book1;
const book = fakeBookData.books[bookId];

act(() => {
bookSelector.props().onSelectBook(book);
Expand Down Expand Up @@ -236,61 +266,113 @@ describe('BookPicker', () => {
});
});

it('displays page numbers corresponding to selected chapter', async () => {
[
// Select a TOC entry which corresponds to a whole HTML document.
{
book: 'book1',
chapter: 0,
expectedEndChapter: 1,
expectedStartPage: '1',
expecteEndPage: '10',
},
// Select a TOC entry which corresponds to a whole HTML document, and has
// child entries.
{
book: 'book3',
chapter: 0,
expectedEndChapter: 3,
expectedStartPage: '3',
expecteEndPage: '7',
},
// Select a TOC entry which corresponds to part of an HTML document. The
// selection should be expanded to the whole page.
{
book: 'book3',
chapter: 1, // Chapter A - Part 1
expectedEndChapter: 3,
expectedStartPage: '3',
expecteEndPage: '7',
},
].forEach(
({
book,
chapter,
expectedEndChapter,
expectedStartPage,
expecteEndPage,
}) => {
it('selects expected page number and CFI range when TOC entry is clicked', async () => {
const onSelectBook = sinon.stub();
const picker = renderBookPicker({
allowPageRangeSelection: true,
onSelectBook,
});

selectBook(picker, book);
clickSelectButton(picker);

await waitForTableOfContents(picker);
selectChapter(picker, chapter);

// Check page range displayed after selecting entry.
const startInput = picker.find('input[data-testid="start-page"]');
const endInput = picker.find('input[data-testid="end-page"]');
assert.equal(startInput.prop('placeholder'), expectedStartPage);
assert.equal(endInput.prop('placeholder'), expecteEndPage);

// Check selection passed to callback when submitting form.
clickSelectButton(picker);
await waitFor(() => onSelectBook.called);

const startEntry = fakeBookData.chapters[book][chapter];
const endEntry = fakeBookData.chapters[book][expectedEndChapter];
const bookData = fakeBookData.books[book];

assert.calledWith(
onSelectBook,
{
book: bookData,
content: {
type: 'toc',
start: startEntry,
end: endEntry,
},
},
`vitalsource://books/bookID/${bookData.id}/cfi/${startEntry.cfi}`,
);
});
},
);

// This test covers only the case where `allowPageRangeSelection` is false.
// The case where it is true is handled above.
it('invokes `onSelectBook` callback after a book and chapter are chosen', async () => {
const onSelectBook = sinon.stub();
const picker = renderBookPicker({
allowPageRangeSelection: true,
allowPageRangeSelection: false,
onSelectBook,
});

selectBook(picker);
const book = selectBook(picker);
clickSelectButton(picker);

await waitForTableOfContents(picker);
selectChapter(picker);

const startInput = picker.find('input[data-testid="start-page"]');
const endInput = picker.find('input[data-testid="end-page"]');
assert.equal(startInput.prop('placeholder'), '1');
assert.equal(endInput.prop('placeholder'), '10');
});

[
{ allowPageRangeSelection: true },
{ allowPageRangeSelection: false },
].forEach(({ allowPageRangeSelection }) => {
it('invokes `onSelectBook` callback after a book and chapter are chosen', async () => {
const onSelectBook = sinon.stub();
const picker = renderBookPicker({
allowPageRangeSelection,
onSelectBook,
});

const book = selectBook(picker);
clickSelectButton(picker);

await waitForTableOfContents(picker);
const chapter = selectChapter(picker);
clickSelectButton(picker);
const chapter = selectChapter(picker);
clickSelectButton(picker);

const expectedEnd = allowPageRangeSelection
? fakeBookData.chapters.book1[1] // Start of chapter after selection
: undefined;

await waitFor(() => onSelectBook.called);
assert.calledWith(
onSelectBook,
{
book,
content: {
type: 'toc',
start: chapter,
end: expectedEnd,
},
await waitFor(() => onSelectBook.called);
assert.calledWith(
onSelectBook,
{
book,
content: {
type: 'toc',
start: chapter,
end: undefined,
},
'vitalsource://books/bookID/book1/cfi//1',
);
});
},
'vitalsource://books/bookID/book1/cfi//1',
);
});

it('invokes `onSelectBook` callback after a book and page range are chosen', async () => {
Expand Down
68 changes: 68 additions & 0 deletions lms/static/scripts/frontend_apps/utils/cfi.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
/**
* Functions for working with EPUB Canonical Fragment Identifiers.
*
* See https://idpf.org/epub/linking/cfi/.
*/

/**
* Strip assertions from a Canonical Fragment Identifier.
*
* Assertions are `[...]` enclosed sections which act as checks on the validity
* of numbers but do not affect the sort order.
*
* @example
* stripCFIAssertions("/6/14[chap05ref]") // returns "/6/14"
*/
export function stripCFIAssertions(cfi: string): string {
// Fast path for CFIs with no assertions.
if (!cfi.includes('[')) {
return cfi;
}

let result = '';

// Has next char been escaped?
let escaped = false;

// Are we in a `[...]` assertion section?
let inAssertion = false;

for (const ch of cfi) {
if (!escaped && ch === '^') {
escaped = true;
continue;
}

if (!escaped && ch === '[') {
inAssertion = true;
} else if (!escaped && inAssertion && ch === ']') {
inAssertion = false;
} else if (!inAssertion) {
result += ch;
}

escaped = false;
}

return result;
}

/**
* Return a slice of `cfi` up to the first step indirection [1], with assertions
* removed.
*
* A typical CFI consists of a path within the table of contents to indicate
* a content document, a step indirection ("!"), then the path of an element
* within the content document. For such a CFI, this function will retain only
* the content document path.
*
* [1] https://idpf.org/epub/linking/cfi/#sec-path-indirection
*
* @example
* documentCFI('/6/152[;vnd.vst.idref=ch13_01]!/4/2[ch13_sec_1]') // Returns "/6/152"
*/
export function documentCFI(cfi: string): string {
const stripped = stripCFIAssertions(cfi);
const sepIndex = stripped.indexOf('!');
return sepIndex === -1 ? stripped : stripped.slice(0, sepIndex);
}
39 changes: 39 additions & 0 deletions lms/static/scripts/frontend_apps/utils/test/cfi-test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import { documentCFI, stripCFIAssertions } from '../cfi';

describe('cfi', () => {
describe('stripCFIAssertions', () => {
it('returns CFI without assertions unchanged', () => {
assert.equal(stripCFIAssertions('/1/2/3/10'), '/1/2/3/10');
});

it('removes assertions from CFI', () => {
assert.equal(stripCFIAssertions('/1/2[chap4ref]'), '/1/2');
assert.equal(
stripCFIAssertions('/1[part1ref]/2[chapter2ref]/3[subsectionref]'),
'/1/2/3',
);
});

it('ignores escaped characters', () => {
assert.equal(stripCFIAssertions('/1/2[chap4^[ignoreme^]ref]'), '/1/2');
assert.equal(stripCFIAssertions('/1/2[a^[b^]]/3[c^[d^]]'), '/1/2/3');
});
});

describe('documentCFI', () => {
it('returns part of CFI before first step indirection', () => {
// Typical CFI with one step indirection.
assert.equal(documentCFI('/2/4/8!/10/12'), '/2/4/8');

// Rarer case of CFI with multiple step indirections.
assert.equal(documentCFI('/2/4/8!/10/12!/2/4'), '/2/4/8');
});

it('strips assertions', () => {
assert.equal(
documentCFI('/6/152[;vnd.vst.idref=ch13_01]!/4/2[ch13_sec_1]'),
'/6/152',
);
});
});
});

0 comments on commit 8b4c583

Please sign in to comment.