diff --git a/convert-anystyle-data/schema/gold_standard.xml b/convert-anystyle-data/schema/gold_standard.xml index 1e7f36993232f43684b4191bfef95465d9c232c8..3a09319dbc7caae5103c019520de8e824b3ba5f2 100644 --- a/convert-anystyle-data/schema/gold_standard.xml +++ b/convert-anystyle-data/schema/gold_standard.xml @@ -1,28 +1,37 @@ <dataset> - <!-- alternative tag names: <feature>, <predictor> --> <instance source="doi:10.1111/1467-6478.00080"> - <input type="raw"> - 3 R. Goff, ‘The Search for Principle’ (1983) Proceeedings of the British Academy 169, at 171. This is an + <input> + <!-- This is the data from which the output data is predicted by the model/algorith --> + <!-- Alternative tag names: <feature>, <predictor> --> + 3 See R. Goff, ‘The Search for Principle’ (1983) Proceeedings of the British Academy 169, at 171. This is an amplification of Dicey’s remark that ‘[b]y adequate study and careful thought whole departments of law can . . . be reduced to order and exhibited under the form of a few principles which sum up the effect of a hundred cases . . .’. A. Dicey, Can English Law be taught at the Universities? (1883) 20. </input> - <input type="segmented"> - <!-- Alternative: Use TEI: <p><bibl>...</bibl><seg>...</seg><bibl>...</bibl>? --> - <!-- Alternative with less semantics: use only separators between bibl/non-bibl sections, this would - require output with non-bibl result --> - <bibl>3 R. Goff, ‘The Search for Principle’ (1983) Proceeedings of the British Academy 169, + <output type="binary" xmlns="http://www.tei-c.org/ns/1.0"> + <!-- alternative tag names: <target> <label> --> + <!-- This output type simply segments the input into parts which contain bibliographic data and those which do not. + The parts that contain bibliographic data can individually be fed into processors which cannot predict + this distinction themselves, but expect an isolated instance of a bibliographic item to be + segmented into its individual parts (such as Grobid)--> + <!-- is type="binary" the right name? --> + <!-- It's important to keep in mind that some non-bibliographic data is _outside_ of the bibliographic parts, + whereas others are _inside_ (example required), i.e. need to be nested inside the <bibl> so that not to + split a coherent bibliographic item into two incomplete ones --> + <seg>3 See</seg> + <bibl>R. Goff, ‘The Search for Principle’ (1983) Proceeedings of the British Academy 169, at 171. </bibl> - <nobibl>This is an amplification of Dicey’s remark that ‘[b]y adequate study and careful + <seg>This is an amplification of Dicey’s remark that ‘[b]y adequate study and careful thought whole departments of law can . . . be reduced to order and exhibited under the form of a few principles which sum up the effect of a hundred cases . . .’. - </nobibl> + </seg> <bibl>A. Dicey, Can English Law be taught at the Universities? (1883) 20.</bibl> - </input> - <!-- alternative tag names: <target> <label> --> + </output> <output type="bibl"> <bibl xmlns="http://www.tei-c.org/ns/1.0"> + <seg type="footnote-number">3</seg> + <seg type="signal">See</seg> <author> <persName> <forename>R.</forename> @@ -40,11 +49,11 @@ <biblScope unit="page" from="at">at 171</biblScope> . </bibl> - <seg type="comment">This is an amplification of Dicey’s remark that ‘[b]y adequate study and careful - thought whole departments of law can . . . be reduced to order and exhibited under the form of a - few principles which sum up the effect of a hundred cases . . .’. - </seg> <bibl xmlns="http://www.tei-c.org/ns/1.0"> + <seg type="comment">This is an amplification of Dicey’s remark that ‘[b]y adequate study and careful + thought whole departments of law can . . . be reduced to order and exhibited under the form of a + few principles which sum up the effect of a hundred cases . . .’. + </seg> <author> <persName> <forename>A.</forename>