Export Transformer

With the export transformer, you can export almost anything out of Datasaur. Your new export transformer will have this template:

/**
 * This function should be written as this template and return string.
 */
(document: Exportable): string => {
  /// Implement export function here
  return document.cells.map((cell) => cell.content).join('\n');
};

Sample case

This example shows how to export a span labeling in a format compatible with Google AutoMl. The file transformer script is shown below, written in TypeScript:

function getCellMap(cells: Cell[]) {
  const cellMap = new Map<number, Cell>();
  cells.forEach(cell => {
    cellMap.set(cell.line, cell);
  })
  return cellMap;
}

function getLabelSetMap(labelSets: LabelSet[]) {
  const labelSetMap = new Map<string, LabelItem>();
  labelSets.forEach(labelSet => {
    labelSet.labelItems.forEach(labelItem => {
      labelSetMap.set(labelItem.id, labelItem);
    })
  });
  return labelSetMap;
}

function convertOffset(label: SimpleLabel, cell: Cell) {
  const offset = { "end_offset": 0, "start_offset": 0 };

  const startTokenIndex = label.startTokenIndex;
  const endTokenIndex = label.endTokenIndex;
  const startCharIndex = label.startCharIndex;
  const endCharIndex = label.endCharIndex;

  let offsetCounter = 0;
  for (let i = 0; i <= endTokenIndex; i++) {
    if (i == startTokenIndex) {
      offset.start_offset = offsetCounter + startCharIndex;
    }
    if (i == endTokenIndex) {
      offset.end_offset = offsetCounter + endCharIndex + 1;
      break
    }
    offsetCounter = offsetCounter + cell.tokens[i].length + 1;
  }
  return offset;
}

function stringifyWithSpaces(obj) {
	let result = JSON.stringify(obj, null, 1); // stringify, with line-breaks and indents
	result = result.replace(/^ +/gm, " "); // remove all but the first space for each line
	result = result.replace(/\n/g, ""); // remove line-breaks
	result = result.replace(/{ /g, "{").replace(/ }/g, "}"); // remove spaces between object-braces and first/last props
	result = result.replace(/\[ /g, "[").replace(/ \]/g, "]"); // remove spaces between array-brackets and first/last items
	return result;
}

/**
 * This function should be written as this template and return string.
 */
(document: Exportable): string => {
  /// Implement export function here
  const cellMap = getCellMap(document.cells);
  const labelSetMap = getLabelSetMap(document.labelSets);
  const examplesMap = new Map<number, Object>();

  document.labels.forEach(label => {
    const labelItem = labelSetMap.get(label.labelSetItemId);
    const cell = cellMap.get(label.startCellLine);
    const offset = convertOffset(label, cell);

    const annotation = { "text_extraction": {"text_segment": offset}, "display_name": labelItem.labelName };
    if (examplesMap.has(label.startCellLine)) {
      const example = examplesMap.get(label.startCellLine);
      example["annotations"].push(annotation);
      examplesMap.set(label.startCellLine, example);
    } else {
      const example = {
        "annotations": [annotation],
        "text_snippet": {"content": cell.tokens.join(' ')}
      };
      examplesMap.set(label.startCellLine, example);
    }
  });

  let output = [];
  examplesMap.forEach((value) => {
    output.push(stringifyWithSpaces(value));
  })
  return output.join('\n');
};

Follow these steps to upload and export using a custom export transformer:

  1. Go to the File transformers page.

  2. Click Create file transformer.

  3. Enter a name, select Export as the purpose, then click Create.

  4. Paste the file transformer script to the editor. You can also upload it.

  5. Go to Projects page and open project you want to export.

  6. Click File > Export file.

  7. In the Format field, select Custom Format (via File Transformer).

  8. In the Export as field, add the .jsonl extension at the end of the file name.

  9. In the File transformer field, select the file transformer you just created.

If you have any questions, please reach out to [email protected].

Last updated