omkarenator commited on
Commit
e137e27
1 Parent(s): 0c82fbb

initial commit

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +50 -35
  2. Dockerfile +10 -0
  3. common.py +7 -0
  4. curated.py +274 -0
  5. data/.gitattributes +1 -0
  6. data/__pycache__/non_web_urls.cpython-310.pyc +0 -0
  7. data/__pycache__/url_blocklist.cpython-310.pyc +0 -0
  8. data/all_signals.json +3 -0
  9. data/bad_url_doc.jsonl +3 -0
  10. data/cluster_dist.json +3 -0
  11. data/cluster_sizes_100.json +3 -0
  12. data/curated_samples/arxiv_extract.json +0 -0
  13. data/curated_samples/arxiv_raw.json +0 -0
  14. data/curated_samples/dm_maths_extract.json +32 -0
  15. data/curated_samples/dm_maths_raw.json +42 -0
  16. data/curated_samples/europarl_raw.json +0 -0
  17. data/curated_samples/filter_details.csv +11 -0
  18. data/curated_samples/freelaw_extract.json +32 -0
  19. data/curated_samples/freelaw_raw.json +0 -0
  20. data/curated_samples/pg19_raw.json +0 -0
  21. data/curated_samples/philpapers_raw.json +0 -0
  22. data/curated_samples/pubmed_extract.json +52 -0
  23. data/curated_samples/pubmed_raw.json +0 -0
  24. data/curated_samples/s2orc_abstract_raw.json +162 -0
  25. data/curated_samples/s2orc_raw.json +0 -0
  26. data/curated_samples/stackexchange_extract.json +0 -0
  27. data/curated_samples/stackexchange_raw.json +242 -0
  28. data/curated_samples/wiki.json +0 -0
  29. data/dataset_details.csv +3 -0
  30. data/dataset_inclusion.csv +3 -0
  31. data/dataset_inclusion_size.csv +3 -0
  32. data/line_info.json +3 -0
  33. data/lorem_ipsum.json +3 -0
  34. data/mbzuai-llm-us-east-1 - S3 bucket _ S3 _ us-east-1.mhtml +3 -0
  35. data/meta_non_web.py +3 -0
  36. data/non_web_urls.py +3 -0
  37. data/repeat_line_frac.jsonl +3 -0
  38. data/sample.py +3 -0
  39. data/sample_bad_urls.py +3 -0
  40. data/sample_doc_stat.json +3 -0
  41. data/sample_dup_ngram.json +3 -0
  42. data/sample_en_low.json +3 -0
  43. data/sample_java.jsonl +3 -0
  44. data/sample_non_en.json +3 -0
  45. data/sample_refinedweb_line.json +3 -0
  46. data/sample_terminal_punc.json +3 -0
  47. data/sample_top_ngram.json +3 -0
  48. data/sample_url_exclusion.json +3 -0
  49. data/sample_warc.json +3 -0
  50. data/sample_wet.json +3 -0
.gitattributes CHANGED
@@ -1,35 +1,50 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dist_assets_images_fineweb-recipe.png filter=lfs diff=lfs merge=lfs -text
2
+ pipeline.png filter=lfs diff=lfs merge=lfs -text
3
+ prep-across-diff-dump-dup-counts-global.png filter=lfs diff=lfs merge=lfs -text
4
+ prep-across-diff-years-global.png filter=lfs diff=lfs merge=lfs -text
5
+ prep-diff-buckets-global.png filter=lfs diff=lfs merge=lfs -text
6
+ prep-diff-buckets-local.png filter=lfs diff=lfs merge=lfs -text
7
+ prep-vs-dump-dup-global.png filter=lfs diff=lfs merge=lfs -text
8
+ perp-across-diff-buckets-global.png filter=lfs diff=lfs merge=lfs -text
9
+ prep-across-diff-buckets-local.png filter=lfs diff=lfs merge=lfs -text
10
+ prep-across-diff-docs-dup-count-global.png filter=lfs diff=lfs merge=lfs -text
11
+ cc.png filter=lfs diff=lfs merge=lfs -text
12
+ image3.png filter=lfs diff=lfs merge=lfs -text
13
+ image7.png filter=lfs diff=lfs merge=lfs -text
14
+ prep-diff-dump-dump-counts-local.png filter=lfs diff=lfs merge=lfs -text
15
+ prep-vs-dump-dup-local.png filter=lfs diff=lfs merge=lfs -text
16
+ 100k.png filter=lfs diff=lfs merge=lfs -text
17
+ image9.png filter=lfs diff=lfs merge=lfs -text
18
+ prep-across-diff-year-global-dup-buckets.png filter=lfs diff=lfs merge=lfs -text
19
+ data/sample_doc_stat.json filter=lfs diff=lfs merge=lfs -text
20
+ data/sample_en_low.json filter=lfs diff=lfs merge=lfs -text
21
+ data/toxic_lines.json filter=lfs diff=lfs merge=lfs -text
22
+ data/web_filter_pipeline.json filter=lfs diff=lfs merge=lfs -text
23
+ data/mbzuai-llm-us-east-1[[:space:]]-[[:space:]]S3[[:space:]]bucket[[:space:]]_[[:space:]]S3[[:space:]]_[[:space:]]us-east-1.mhtml filter=lfs diff=lfs merge=lfs -text
24
+ data/url_blocklist.py filter=lfs diff=lfs merge=lfs -text
25
+ data/sample_top_ngram.json filter=lfs diff=lfs merge=lfs -text
26
+ data/dataset_inclusion.csv filter=lfs diff=lfs merge=lfs -text
27
+ data/line_info.json filter=lfs diff=lfs merge=lfs -text
28
+ data/non_web_urls.py filter=lfs diff=lfs merge=lfs -text
29
+ data/web_pipeline_comparison.csv filter=lfs diff=lfs merge=lfs -text
30
+ data/all_signals.json filter=lfs diff=lfs merge=lfs -text
31
+ data/dataset_inclusion_size.csv filter=lfs diff=lfs merge=lfs -text
32
+ data/sample_java.jsonl filter=lfs diff=lfs merge=lfs -text
33
+ data/sample_warc.json filter=lfs diff=lfs merge=lfs -text
34
+ data/sample_wet.json filter=lfs diff=lfs merge=lfs -text
35
+ data/curated_samples filter=lfs diff=lfs merge=lfs -text
36
+ data/dataset_details.csv filter=lfs diff=lfs merge=lfs -text
37
+ data/sample.py filter=lfs diff=lfs merge=lfs -text
38
+ data/sample_dup_ngram.json filter=lfs diff=lfs merge=lfs -text
39
+ data/sample_non_en.json filter=lfs diff=lfs merge=lfs -text
40
+ data/sample_terminal_punc.json filter=lfs diff=lfs merge=lfs -text
41
+ data/__pycache__ filter=lfs diff=lfs merge=lfs -text
42
+ data/bad_url_doc.jsonl filter=lfs diff=lfs merge=lfs -text
43
+ data/cluster_dist.json filter=lfs diff=lfs merge=lfs -text
44
+ data/lorem_ipsum.json filter=lfs diff=lfs merge=lfs -text
45
+ data/repeat_line_frac.jsonl filter=lfs diff=lfs merge=lfs -text
46
+ data/sample_url_exclusion.json filter=lfs diff=lfs merge=lfs -text
47
+ data/meta_non_web.py filter=lfs diff=lfs merge=lfs -text
48
+ data/sample_bad_urls.py filter=lfs diff=lfs merge=lfs -text
49
+ data/sample_refinedweb_line.json filter=lfs diff=lfs merge=lfs -text
50
+ images/llm360_logo.png filter=lfs diff=lfs merge=lfs -text
Dockerfile ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10
2
+ WORKDIR /code
3
+ COPY --link --chown=1000 . .
4
+ RUN mkdir -p /tmp/cache/
5
+ RUN chmod a+rwx -R /tmp/cache/
6
+ ENV HF_HUB_CACHE=HF_HOME
7
+ RUN pip install --no-cache-dir -r requirements.txt
8
+
9
+ ENV PYTHONUNBUFFERED=1 PORT=7860
10
+ CMD ["python", "main.py"]
common.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ from fasthtml.common import *
2
+ from fasthtml.components import *
3
+
4
+
5
+ def common_steps():
6
+ return Div(Section(H2(P("Common Steps")), id="inner-text"))
7
+
curated.py ADDED
@@ -0,0 +1,274 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fasthtml.common import *
2
+ from fasthtml.components import *
3
+ from plotly import graph_objects as go
4
+ from fh_plotly import plotly2fasthtml
5
+ import pandas as pd
6
+ import json
7
+ from data_viewer import view_data, gen_random_id
8
+ from rich import print
9
+ import uuid
10
+
11
+
12
+ data_sources = [
13
+ "Freelaw",
14
+ "Wikipedia",
15
+ "PhilPapers",
16
+ "Arxiv",
17
+ "S2ORC",
18
+ "S2ORC Abstract",
19
+ "Pubmed",
20
+ "USPTO",
21
+ "Hackernews",
22
+ "Ubuntu IRC",
23
+ "StackExchange",
24
+ "DM Maths",
25
+ "PG19",
26
+ "Europarl",
27
+ ]
28
+
29
+
30
+ def get_data(data_source: str = "Freelaw", doc_id: int = 3, target: str = "foo"):
31
+ doc_id = max(0, min(int(doc_id), 9))
32
+
33
+ if data_source == "Freelaw":
34
+ raw_sample_doc = json.load(open("data/curated_samples/freelaw_raw.json"))
35
+ extracted_sample_doc = json.load(
36
+ open("data/curated_samples/freelaw_extract.json")
37
+ )
38
+ elif data_source == "Wikipedia":
39
+ raw_sample_doc = extracted_sample_doc = json.load(
40
+ open("data/curated_samples/wiki.json")
41
+ )
42
+ elif data_source == "StackExchange":
43
+ raw_sample_doc = json.load(open("data/curated_samples/stackexchange_raw.json"))
44
+ extracted_sample_doc = json.load(
45
+ open("data/curated_samples/stackexchange_extract.json")
46
+ )
47
+ elif data_source == "PhilPapers":
48
+ raw_sample_doc = extracted_sample_doc = json.load(
49
+ open("data/curated_samples/philpapers_raw.json")
50
+ )
51
+ elif data_source == "Arxiv":
52
+ raw_sample_doc = json.load(open("data/curated_samples/arxiv_raw.json"))
53
+ extracted_sample_doc = json.load(
54
+ open("data/curated_samples/arxiv_extract.json")
55
+ )
56
+ elif data_source == "S2ORC":
57
+ raw_sample_doc = extracted_sample_doc = json.load(
58
+ open("data/curated_samples/s2orc_raw.json")
59
+ )
60
+ elif data_source == "S2ORC Abstract":
61
+ raw_sample_doc = extracted_sample_doc = json.load(
62
+ open("data/curated_samples/s2orc_abstract_raw.json")
63
+ )
64
+ elif data_source == "Pubmed":
65
+ raw_sample_doc = json.load(open("data/curated_samples/pubmed_raw.json"))
66
+ extracted_sample_doc = json.load(
67
+ open("data/curated_samples/pubmed_extract.json")
68
+ )
69
+ elif data_source == "DM Maths":
70
+ raw_sample_doc = json.load(open("data/curated_samples/dm_maths_raw.json"))
71
+ extracted_sample_doc = json.load(
72
+ open("data/curated_samples/dm_maths_extract.json")
73
+ )
74
+ elif data_source == "PG19":
75
+ raw_sample_doc = extracted_sample_doc = json.load(
76
+ open("data/curated_samples/pg19_raw.json")
77
+ )
78
+ elif data_source == "Europarl":
79
+ raw_sample_doc = extracted_sample_doc = json.load(
80
+ open("data/curated_samples/europarl_raw.json")
81
+ )
82
+ else:
83
+ raw_sample_doc = extracted_sample_doc = [{} for _ in range(10)]
84
+
85
+ raw_json = raw_sample_doc[doc_id]
86
+ extracted_json = extracted_sample_doc[doc_id]
87
+ return view_data(
88
+ raw_json,
89
+ extracted_json,
90
+ doc_id=doc_id,
91
+ data_source=data_source,
92
+ data_sources=data_sources,
93
+ target=target,
94
+ )
95
+
96
+
97
+ def get_chart_28168342():
98
+ fig = go.Figure()
99
+ filter_names = [
100
+ "Download",
101
+ "Language",
102
+ "Min word count",
103
+ "Title Abstract",
104
+ "Majority language",
105
+ "Paragraph count",
106
+ "Frequency",
107
+ "Unigram log probability",
108
+ "Local dedup",
109
+ ]
110
+
111
+ data_sources = [
112
+ ("Wikipedia", [100, 90, 80, 70, 60, 50, 40, 30, 20]),
113
+ ("Freelaw", [100, 90, 80, 70, 60, 50, 40, 20, 20]),
114
+ ("DM Maths", [100, 90, 80, 70, 60, 40, 40, 30, 20]),
115
+ ("USPTO", [100, 90, 80, 70, 60, 40, 40, 30, 20]),
116
+ ("PG19", [100, 90, 80, 70, 60, 40, 40, 30, 20]),
117
+ ("Hackernews", [100, 90, 80, 70, 60, 40, 40, 30, 20]),
118
+ ("Ubuntu IRC", [100, 90, 80, 70, 60, 40, 40, 30, 20]),
119
+ ("Europarl", [100, 90, 80, 70, 60, 40, 40, 30, 20]),
120
+ ("StackExchange", [100, 90, 80, 70, 60, 40, 40, 30, 20]),
121
+ ("Arxiv", [100, 90, 80, 70, 60, 40, 40, 30, 20]),
122
+ ("S2ORC", [100, 90, 80, 70, 60, 40, 40, 30, 20]),
123
+ ("S2ORC Abstract", [100, 90, 80, 70, 60, 40, 40, 30, 20]),
124
+ ("PubMed Central", [100, 90, 80, 70, 60, 40, 40, 30, 20]),
125
+ ("PubMed Central Abstract", [100, 90, 80, 70, 60, 40, 40, 30, 20]),
126
+ ("PhilPapers", [100, 90, 80, 70, 60, 40, 40, 30, 20]),
127
+ ]
128
+
129
+ for name, x_values in data_sources:
130
+ fig.add_trace(
131
+ go.Funnel(
132
+ name=name,
133
+ orientation="h",
134
+ y=filter_names,
135
+ x=x_values,
136
+ textinfo="value+percent total",
137
+ textposition="inside",
138
+ )
139
+ )
140
+
141
+ fig.update_layout(height=500, plot_bgcolor="rgba(0,0,0,0)")
142
+ return fig
143
+
144
+
145
+ def update(target: str, request):
146
+ params = request.query_params
147
+ if data_source := params.get(f"data_source_{target}"):
148
+ return get_data(
149
+ data_source, params.get(f"doc_id_{target}", 3), target)
150
+ if doc_id := params.get(f"doc_id_{target}"):
151
+ return get_data(
152
+ params.get(f"data_source_{target}"), doc_id, target)
153
+
154
+
155
+ def curated(request):
156
+ data_preparation_steps = pd.DataFrame(
157
+ {
158
+ "Method": [
159
+ "HTTP/FTP dumps",
160
+ "Web crawling",
161
+ "Archive snapshot",
162
+ "Generated",
163
+ "Curated",
164
+ ],
165
+ "Description": [
166
+ "Acquiring data from HTTP/FTP dumps",
167
+ "Crawling websites to extract data",
168
+ "Working with archive dumps",
169
+ "Generating synthetic data",
170
+ "High quality curated data",
171
+ ],
172
+ "Source": [
173
+ "Freelaw | Wikipedia | PhilPapers | Arxiv | S2ORC | Pubmeds",
174
+ "USPTO | Hackernews | Ubuntu IRC",
175
+ "StackExchange",
176
+ "DM Maths",
177
+ "PG19 | Europarl",
178
+ ],
179
+ }
180
+ )
181
+
182
+ table_html = data_preparation_steps.to_html(index=False, border=0)
183
+ table_div = Div(NotStr(table_html), style="margin: 40px;")
184
+
185
+ text = P("""This initial stage serves as the foundation for the entire
186
+ process. Here, we focus on acquiring and extracting the raw data, which can
187
+ come from various sources such as crawling websites, using HTTP/FTP dumps,
188
+ or working with archive dumps. For instance, to download and prepare a
189
+ dataset, we can specific downloaders based on the data source. Each dataset
190
+ might have its own downloader script which can be updated in real time to
191
+ handle changes in the data source. Here is a general outline of the data
192
+ preparation process: It's worth noting that some pipelines might require
193
+ invoking additional functions or scripts to handle specific data sources or
194
+ formats. These helper scripts can be located within specific directories
195
+ or modules dedicated to the dataset.""")
196
+
197
+ data_preparation_div = Div(
198
+ H3("Data Preparation"),
199
+ text,
200
+ table_div,
201
+ Div(
202
+ get_data(target=gen_random_id()),
203
+ style="border: 1px solid #ccc; padding: 20px;",
204
+ ),
205
+ )
206
+
207
+ text = P("""Data preprocessing is a crucial step in the data science
208
+ pipeline. It involves cleaning and transforming raw data into a format that
209
+ is suitable for analysis. This process includes handling missing values,
210
+ normalizing data, encoding categorical variables, and more.""")
211
+
212
+ preprocessing_steps = pd.DataFrame(
213
+ {
214
+ "Step": [
215
+ "Language Filter",
216
+ "Min Word Count",
217
+ "Title Abstract",
218
+ "Majority Language",
219
+ "Paragraph Count",
220
+ "Frequency",
221
+ "Unigram Log Probability",
222
+ ],
223
+ "Description": [
224
+ "Filtering data based on language",
225
+ "Setting a minimum word count threshold",
226
+ "Extracting information from the title and abstract",
227
+ "Identifying the majority language in the dataset",
228
+ "Counting the number of paragraphs in each document",
229
+ "Calculating the frequency of each word in the dataset",
230
+ "Calculating the log probability of each unigram",
231
+ ],
232
+ "Need": [
233
+ "To remove documents in unwanted languages",
234
+ "To filter out documents with very few words",
235
+ "To extract relevant information for analysis",
236
+ "To understand the distribution of languages in the dataset",
237
+ "To analyze the structure and length of documents",
238
+ "To identify important words in the dataset",
239
+ "To measure the significance of individual words",
240
+ ],
241
+ "Pros": [
242
+ "Improves data quality by removing irrelevant documents",
243
+ "Filters out low-quality or incomplete documents",
244
+ "Provides additional information for analysis",
245
+ "Enables language-specific analysis and insights",
246
+ "Helps understand the complexity and content of documents",
247
+ "Identifies important terms and topics in the dataset",
248
+ "Quantifies the importance of individual words",
249
+ ],
250
+ "Cons": [
251
+ "May exclude documents in less common languages",
252
+ "May remove documents with valuable information",
253
+ "May introduce bias in the analysis",
254
+ "May not accurately represent the language distribution",
255
+ "May not capture the complexity of document structure",
256
+ "May be sensitive to noise and outliers",
257
+ "May not capture the semantic meaning of words",
258
+ ],
259
+ }
260
+ )
261
+
262
+ table_html = preprocessing_steps.to_html(index=False, border=0)
263
+ table_div = Div(NotStr(table_html), style="margin: 40px;")
264
+ data_preprocessing_div = Div(H3("Data Preprocessing"), text, table_div)
265
+
266
+ return Div(
267
+ Section(
268
+ H2("Curated Sources"),
269
+ plotly2fasthtml(get_chart_28168342()),
270
+ data_preparation_div,
271
+ data_preprocessing_div,
272
+ id="inner-text",
273
+ )
274
+ )
data/.gitattributes ADDED
@@ -0,0 +1 @@
 
 
1
+ cluster_sizes_100.json filter=lfs diff=lfs merge=lfs -text
data/__pycache__/non_web_urls.cpython-310.pyc ADDED
Binary file (569 Bytes). View file
 
data/__pycache__/url_blocklist.cpython-310.pyc ADDED
Binary file (1.19 kB). View file
 
data/all_signals.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ffe5794345282411ceb9d6b8c9dddb88dc428370297e92f4e991f7fb8aee0945
3
+ size 595
data/bad_url_doc.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f990b8dd33a1bce672e32a6f7f51d748dd2b692e38c308672d1616e22f38f67c
3
+ size 466015
data/cluster_dist.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a2fe2ebf2a998c68a4174972849b428947ff30048380c5f8135948753d53f80
3
+ size 503915
data/cluster_sizes_100.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9524264b68715301a4a7fb7f7f254189a7404e45a68ed519c4231cce2877268b
3
+ size 130012
data/curated_samples/arxiv_extract.json ADDED
The diff for this file is too large to render. See raw diff
 
data/curated_samples/arxiv_raw.json ADDED
The diff for this file is too large to render. See raw diff
 
data/curated_samples/dm_maths_extract.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "text": "Question: b'Simplify (q**(-12))**(-1\\/15)\\/(q**(4\\/9)*q*q*q**(-3)) assuming q is positive.\\n'\\nAnswer: b'q**(61\\/45)\\n'"
4
+ },
5
+ {
6
+ "text": "Question: b'In base 5, what is 10 - 3311121?\\n'\\nAnswer: b'-3311111\\n'"
7
+ },
8
+ {
9
+ "text": "Question: b'Which is bigger: 1 or 3\\/5611166?\\n'\\nAnswer: b'1\\n'"
10
+ },
11
+ {
12
+ "text": "Question: b'Total of 0.06 and -1977321735.\\n'\\nAnswer: b'-1977321734.94\\n'"
13
+ },
14
+ {
15
+ "text": "Question: b'Calculate prob of sequence ml when two letters picked without replacement from {k: 5, l: 2, h: 1, q: 7, m: 5}.\\n'\\nAnswer: b'1\\/38\\n'"
16
+ },
17
+ {
18
+ "text": "Question: b'Divide -380649 by 1.\\n'\\nAnswer: b'-380649\\n'"
19
+ },
20
+ {
21
+ "text": "Question: b'What is 87000.45l in millilitres?\\n'\\nAnswer: b'87000450\\n'"
22
+ },
23
+ {
24
+ "text": "Question: b'Which is the nearest to -1\\/4? (a) 2\\/15 (b) 1\\/2 (c) 4.1 (d) 3\\n'\\nAnswer: b'a\\n'"
25
+ },
26
+ {
27
+ "text": "Question: b'Let l(g) be the third derivative of g**8\\/20160 + g**7\\/2520 + g**5\\/30 + 23*g**2. Let m(f) be the third derivative of l(f). What is m(-3)?\\n'\\nAnswer: b'3\\n'"
28
+ },
29
+ {
30
+ "text": "Question: b'What is the hundred thousands digit of 22225209?\\n'\\nAnswer: b'2\\n'"
31
+ }
32
+ ]
data/curated_samples/dm_maths_raw.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "question": "b'Simplify (q**(-12))**(-1\/15)\/(q**(4\/9)*q*q*q**(-3)) assuming q is positive.\\n'",
4
+ "answer": "b'q**(61\/45)\\n'"
5
+ },
6
+ {
7
+ "question": "b'In base 5, what is 10 - 3311121?\\n'",
8
+ "answer": "b'-3311111\\n'"
9
+ },
10
+ {
11
+ "question": "b'Which is bigger: 1 or 3\/5611166?\\n'",
12
+ "answer": "b'1\\n'"
13
+ },
14
+ {
15
+ "question": "b'Total of 0.06 and -1977321735.\\n'",
16
+ "answer": "b'-1977321734.94\\n'"
17
+ },
18
+ {
19
+ "question": "b'Calculate prob of sequence ml when two letters picked without replacement from {k: 5, l: 2, h: 1, q: 7, m: 5}.\\n'",
20
+ "answer": "b'1\/38\\n'"
21
+ },
22
+ {
23
+ "question": "b'Divide -380649 by 1.\\n'",
24
+ "answer": "b'-380649\\n'"
25
+ },
26
+ {
27
+ "question": "b'What is 87000.45l in millilitres?\\n'",
28
+ "answer": "b'87000450\\n'"
29
+ },
30
+ {
31
+ "question": "b'Which is the nearest to -1\/4? (a) 2\/15 (b) 1\/2 (c) 4.1 (d) 3\\n'",
32
+ "answer": "b'a\\n'"
33
+ },
34
+ {
35
+ "question": "b'Let l(g) be the third derivative of g**8\/20160 + g**7\/2520 + g**5\/30 + 23*g**2. Let m(f) be the third derivative of l(f). What is m(-3)?\\n'",
36
+ "answer": "b'3\\n'"
37
+ },
38
+ {
39
+ "question": "b'What is the hundred thousands digit of 22225209?\\n'",
40
+ "answer": "b'2\\n'"
41
+ }
42
+ ]
data/curated_samples/europarl_raw.json ADDED
The diff for this file is too large to render. See raw diff
 
data/curated_samples/filter_details.csv ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Component,Filtered out %,No Dup %,2 to 5 duplicates,6 to 10 duplicates,11 to 100 duplicates,101 to 1000 duplicates,1001+ duplicates
2
+ Papers,15,75.99,19.40,2.89,1.71,0.01,<0.01
3
+ Wikipedia,21,91.91,4.70,1.58,1.76,0.05,<0.01
4
+ Stack Exchange,<0.1,98.02,1.27,0.35,0.35,0.01,<0.01
5
+ EuroParl,1,98.87,0.94,0.09,0.10,0,0
6
+ Ubuntu IRC,0.4,100,0,0,0,0,0
7
+ HackerNews,60,99.91,0.05,0.02,0.02,<0.01,<0.01
8
+ PG19,0.8,31.81,20.03,24.27,22.26,1.58,0.06
9
+ USPTO,22.5,99.94,0.05,0.01,0.01,<0.01,0
10
+ Freelaw,94,91.01,6.87,1.07,1.05,0.01,0
11
+ DM Maths,0,0,0,0,0,0,0
data/curated_samples/freelaw_extract.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "text": "\nWachenfeld, J.\n(dissenting). In In re Woodworth, 15 Fed. Supp. 291; affirmed, 85 F. 2d 50 (C. C. A. 2, 1936), the court held:\n\u201cOn principle, it cannot be doubted that when an attorney makes an agreement to prosecute a case for a fee contingent on success, and is disbarred before the fee is earned, he may not collect compensation from his client for the work done. The agreed fee he cannot have, because he has not performed his engagement and the contingency on which the compensation was to rest has not happened. Reasonable compensation in lieu of the fee he cannot have, because his inability to complete his contract has been brought about by his own wrongful \u25a0conduct.\u201d\nI subscribe to this reasoning and conclusion and am therefore to affirm.\nAdopting this rule would not complicate or bring economic \u25a0considerations into disciplinary proceedings nor would it defeat their purpose. It would, in my opinion, be an added incentive to professional conduct, which is foreign to disciplinary complaints.\n*530Admittedly, the plaintiff was disbarred because of his own wrongful act, and whether it was with reference to this particular case or not, the result, in my opinion, is the same.\nThe penalty falls and he can no longer represent his client because of his wrongful conduct. The result of that misconduct should be uniform, not varying with the degree of culpability or its relationship to any particular case.\n\u201cI-Iis inability to complete his contract has been brought about by his own wrongful conduct.\u201d\nI would affirm the judgment.\nFor reversal \u2014 Chief Justice Vandeebilt, and Justices Case, Heiiek, Olephant, Bueling and Ackekson \u2014 6.\nFor affirmance \u2014 Justice Wachenfeld \u2014 1.\n"
4
+ },
5
+ {
6
+ "text": "\n\n THE COURT.\n \n\n By mandate, petitioner seeks an order of this court compelling the Superior Court in and for the County of Sacramento to set aside and vacate its order denying petitioner\u2019s motion to quash service of summons and complaint\n \n *703\n \n on petitioner. This court issued its order to show cause and the matter was set for oral argument.\n \n\n After full consideration of the petition, the points and authorities, and the arguments of respective counsel, we are satisfied that the memorandum opinion filed by the Honorable Leonard M. Friedman denying a like motion correctly states the law and we therefore adopt the same as the opinion of this court.\n \n\n \u201cThe court has concluded that defendant Regie Nationals des Usines Renault, Billancourt (Seine), France (hereinafter referred to elliptieally as \u2018Regie\u2019) has had adequate contacts with California and California residents so that the maintenance of the suit against it does not offend our notions of fair play and substantial justice\n \n (International Shoe Co.\n \n v.\n \n Washington,\n \n 326 U.S. 310 [66 S.Ct. 154, 90 L.Ed. 95, 161 A.L.R. 1057]). It is, in other words, 'doing business\u2019 in California and is amenable to substituted service of process on the Secretary of State in the manner provided by Section 6501-6502 of the Corporations Code.\n \n\n \u201cDefendant Regie is a business entity owned by the French government. As an automobile manufacturer, it inaugurates a flow of its products to the California market. It sells its products to defendant Renault, Inc., a wholly-owned subsidiary incorporated in the State of New York. Renault, in turn, sells to various American distributors, who in turn sell to retail dealers. There is a chain of sales leading from defendant Regie to California consumers. The product is such that negligence in manufacture and inspection might well cause injury to California citizens, as is alleged by the plaintiff and by the cross-complainant here.\n \n\n \u201cRegie might' choose to arrange its marketing process through a hierarchy of its own agents and employees. Then, by establishing agents in California to sell its products, it would undoubtedly be amenable to suit in this state. For reasons of its own it chooses to market its products through a wholly-owned American subsidiary and a network of independently-owned distributorships and dealerships. These choices on its part effect little, if any, alteration in the jurisdictional situation. The \u2018contacts\u2019 exist one way or the other and for precisely the same purposes. The differences are differences only in form and description.\n \n\n \u201cApparently, where the tort occurs within the state, extensive sales and promotional contacts with California consumers\n \n *704\n \n through nonexclusive, independent sales representatives may constitute \u2018doing business\u2019\n \n (Cosper\n \n v.\n \n Smith & Wesson Arms Co.,\n \n 53 Cal.2d 77 [346 P.2d 409]). Here there are additional circumstances which, in composite, impel subjection to jurisdiction. These are: (a) the interest of this State in providing a forum for its residents; (b) the relative availability of evidence; (c) the relative burden of defense and prosecution in California rather than at some other place; (d) the ease of access to some alternative forum; (e) the extent to which the cause of action arises out of Regie\u2019s local activities.\n \n Fisher Governor Co.\n \n v.\n \n Superior Court,\n \n 53 Cal.2d 222, 225-226 [1 Cal.Rptr. 1, 347 P.2d 1].\n \n\n \u201cAs regards \u2018fair play\u2019 it is obvious that if California rejects jurisdiction, Regie may successfully bar plaintiff and cross-complainant from access to the courts of all states of the union, including New York.\n \n Cannon Manufacturing Co.\n \n v.\n \n Cudahy Packing Co.,\n \n 267 U.S. 333 [45 S.Ct. 250, 69 L.Ed.\n \n 634];\n \n see also\n \n Fisher Governor Go.\n \n v.\n \n Superior Court, supra.\n \n Regie\u2019s argument would, in effect, confine the claimants to the courts of the Republic of France. Fairness to Regie does not entail this disadvantage to the claimants.\n \n\n \u201cRenault, Inc., whether regarded as an individual corporate entity or as\n \n alter ego\n \n of Regie, is simply a medium through which the latter establishes its business contacts with the California public. As to the mechanics of process serving, Regie has received process via the California Secretary of State without reference to the \u2018presence\u2019 of Renault as its purported agent in California. Thus there is no point in deciding whether to respect the separate status of Renault or to regard it as merely the\n \n alter ego\n \n of Regie.\u201d\n \n\n The order to show cause is discharged and the petition is denied.\n \n\n Petitioner\u2019s application for a hearing by the Supreme Court was denied December 12, 1962.\n \n"
7
+ },
8
+ {
9
+ "text": "\n\n LILLIE, J.\n \n\n Having found defendant guilty of four counts of bookmaking in violation of section 337a, subdivisions 1 and 3, Penal Code, the trial judge on February 10, 1961, sentenced\n \n *247\n \n 'him to a term of 180 days in the county jail; he suspended sentence and granted defendant probation for a period of three years on certain specified terms and conditions, among them, that defendant \u201cnot gamble or engage in any bookmaking activities or have paraphe [r] nalia thereof in his possession, and not be present in places where gambling or bookmaking is conducted,\u201d and that he \u201cobey all laws, orders, rules and regulations of the probation department and of the court. \u2019 \u2019 Shortly thereafter defendant was again arrested and charged with four counts of bookmaking; he was found guilty of a violation of section 337a, subdivision 3, Penal Code, as alleged in count 3 of the information (no. 242588). On August 22, 1961, the court denied probation and sentenced defendant to 90 days in the county jail. At the same time, and in the instant case, the court found defendant to be in violation of the probation order of February 10, 1961, and ordered the same modified to provide that he serve the next 90 days in the county jail, probation to continue under the same terms and conditions upon his release. The court ordered the jail terms in case no. 242588 and in the instant case to run concurrently. From the judgment defendant appeals.\n \n\n It appearing that defendant engaged in bookmaking activities on March 23, 1961, for which he was charged and convicted (judgment affirmed by this court on August 29, 1962,\n \n People\n \n v. Tereno, 207 Cal.App.2d 246 [24 Cal.Rptr. 501], in violation of section 337a, subdivision 3, Penal Code, and the probation order of February 10, 1961, the lower court properly found defendant to be in violation of the order, and modified the same. The judgment is affirmed.\n \n\n While defendant has also appealed from an order denying a motion for new trial, the record in both cases is silent concerning such a motion and no order relative to denial of a new trial appears therein. Thus, appeal from the purported order is dismissed.\n \n\n Wood, P. J., concurred.\n \n"
10
+ },
11
+ {
12
+ "text": "\n\n Opinion by\n \n\n Mb. Justice Bell,\n \n\n This is the second time the defendant was convicted of first degree murder and at each trial the jury in their verdict imposed the death penalty. The defendant in this appeal raises two narrow but very important questions: (1) \u201cMust a suspect in a capital case, prior to police questioning, be furnished counsel or at least advised of his right to counsel and to remain silent upon questioning\u201d; and (2) Did the trial judge commit reversible error in his charge to the jury concerning a man named Phillips? The first question has been ruled adversely to the appellant in a companion case,\n \n Commonwealth v.\n \n Bryant, 367 Pa. 135, 79 A. 2d 193; and we shall therefore proceed to a determination of the second question involved.\n \n\n Joseph Saturno, 65 years old, was found dead in his apartment, 616 Pemberton Street, Philadelphia, about 6:25 p.m. on November 8,1948. The deceased had been terribly beaten and mutilated, with about 20 wounds in the head; and his apartment was a shambles, with blood over everything. Detectives arrived at the scene of the crime shortly thereafter and as a result of a tip went to defendant\u2019s home at 633 Kenilworth Street, less than a block from the deceased\u2019s apartment. Defendant was shot by the detectives while he was attempting to escape and was then taken to a hospital. Detectives Leaf and O\u2019Donnell were permitted to testify, over objections of counsel, that they questioned defendant at the hospital-and that\n \n defendant admitted\n \n be\n \n *161\n \n\n ing the lookout\n \n while his co-defendant, Bryant, and another man, Phillips, beat the decedent to death.\n \n\n Phillips was then brought to the hospital and identified by defendant. Next morning defendant was taken to Detective Division Headquarters and once more questioned. His statements were then reduced to writing, although not signed by him, and his written statement detailing this exceptionally brutal murder, was read in evidence by Detective Steinberg, over the objections of defendant\u2019s counsel. Phillips was again implicated in the crime by defendant.\n \n\n Later the same day defendant signed, a confession similar to the ones he had made on the two previous occasions above mentioned. At this time he was warned that any statement he made would probably be used against him at the trial of his case. Once again defendant connected Phillips with the killing.\n \n\n Both Chambers and Bryant confessed that they had come to Saturno\u2019s apartment to rob him and gave details of the brutal beating administered to Saturno and how they ransacked his apartment in their search for money, but each blamed the actual beating of Saturno on the other. Bryant\u2019s confession was not used or read in defendant\u2019s trial. Defendant makes no contention that his confessions were involuntary or coerced or that he did not have a fair trial (except as to the two matters which he specifically raised in his aforesaid statement of the questions involved).\n \n\n In each of defendant\u2019s confessions he implicated, as above mentioned, a man named\n \n \u201cPhillips\u201d\n \n as having, with Bryant, beaten the deceased to death. Phillips denied this when he was arrested by the police in his bedroom about 2 a.m. the morning after the murder and also when Chambers accused him of it at the hospital and again at the Coroner\u2019s Inquest, as well as at defendant\u2019s trial.\n \n\n \n *162\n \n Phillips testified at defendant\u2019s trial that defendant knocked him unconscious in a crap game on the afternoon of November 8th and that defendant\u2019s mother washed the wound; that Phillips then went to the Pennsylvania Hospital at about 4:15 p.m.; that he left the hospital\n \n around 5 o\u2019clock\n \n and went to the house of Robert Magil, 638 Bainbridge Street, which is less than a block from Saturno\u2019s apartment; that he then went home, had supper and because his head hurt him, went right to bed; and that defendant lied when he said that Phillips had anything to do with the murder.\n \n\n Detective O\u2019Donnell testified that defendant (Chambers) definitely told him that Phillips got the patch on his head in his fight with Pop Saturno. Detectives O\u2019Donnell and MeColgan each testified that he had investigated these conflicting statements and found out from the hospital records and elsewhere that Phillips received the injury prior to 4:30 on the day of the murder; that they had also investigated Phillips\u2019 statement that he had nothing to do with the murder and that they had no proof that he participated in or had any connection with the murder of Saturno. The court then said: \u201cQ. Let\u2019s dispose of Phillips. Did you check to see where Phillips was from the time he was discharged from the hospital until the time he was arrested? Did you make an investigation of that matter? A. Yes, we did. He told us the same story he told on the stand here. Q. Where did you ascertain where Phillips was, home in bed? A. We had nothing to prove that he was anywhere else. . . . The Court: He told you all the discussions up to that time, which would determine the status of Phillips.\n \n Now, Phillips is' out.\n \n Q. All right.\n \n What happened after Phillips is eliminated?\u201d\n \n\n\n *\n \n\n\n\n \n *163\n \n It is also a fact, for what it is worth, that Phillips was exonerated by the Coroner.\n \n\n Defendant contends that the veracity of his story about Phillips was of very great importance to him because if the jury believed defendant was only a lookout and that the brutal beating had been committed by Bryant and Phillips and not by him, the sentence might have been life imprisonment instead of death. With this contention we agree. In the light of this we shall examine the court\u2019s charge to which the defendant objects.\n \n\n The court charged: \u201cThe first thing this man did was\n \n implicate an innocent man who has successfully established an alibi and who has been vindicated in the community. Me had nothing to do with it.\n \n The Commonwealth is not in possession of the slightest evidence that he was\n \n even near\n \n where the crime was committed at the time it was committed. I mention that to you because in determining what are the true facts in this case you have before you statements of this defendant, one of the\n \n two witnesses\n \n to what took place, and if he lied about Phillips you have a right to ask yourselves whether he lied about Bryant. Was he an innocent bystander or was he a participant in this brutality? Was he the man who beat and beat and beat the poor dead man until he passed out of this life, or was he the man who stood in between the doorway of the two rooms? ... So, what is the truth? That is what you must ascertain. The law says that all of those who participate in a robbery and other crimes that are enumerated in the Act of Assembly are equally guilty. ... If A and B are together and A fires the shot, B is just as guilty if he participates in one of these enumerated felonies as though he fired the shot himself. ... If you believe this defendant is guilty of murder in the first degree, then decide whether it should be life imprisonment or death in the electric chair.\u201d\n \n\n \n *164\n \n It is the exclusive province of the jury, not the court, to decide all the facts, the inferences therefrom, the credibility of the witnesses and the weight and effect to be given to all of the testimony. While the main purpose of a judge is to state and explain the law and briefly review the evidence, it is always the privilege and sometimes the duty of a trial judge to express his own opinion, including his opinion of the weight and effect of the evidence or its points of strength and weakness or even the guilt or innocence of the defendant and the verdict which, in his judgment, the jury should render, provided (1) there is reasonable ground for any statement he may make; and (2) he clearly leaves to the jury the right to decide all the facts and every question involved in the case, regardless of any opinion of the court thereon:\n \n Commonwealth v. Cunningham,\n \n 232 Pa. 609, 611, 81 A. 711;\n \n Commonwealth v. Foster,\n \n 364 Pa. 288, 293, 72 A. 2d 279;\n \n Commonwealth v. Simmons,\n \n 361 Pa. 391, 407, 65 A. 2d 353;\n \n Commonwealth v. Watts,\n \n 358 Pa. 92, 97, 56 A. 2d 81;\n \n Commonwealth v. Jones,\n \n 341 Pa. 541, 551, 19 A. 2d 389;\n \n Commonwealth v. Nafus,\n \n 303 Pa. 418, 420-1, 154 A. 485.\n \n\n The question in the instant case is whether the judge in his charge to the jury,\n \n by treating controversial questions as established facts,\n \n took away from the jury their right to pass upon and decide the evidence and issues presented to them, particularly with reference to Phillips? We are of the opinion that the trial judge unintentionally usurped the power of the jury and for this reason, the appeal must be sustained.\n \n\n Our conclusion is further supported by\n \n Commonwealth v. Light,\n \n 195 Pa. 220, 45 A. 933. The defendant in that case was indicted for larceny. We there said: \u201cThis part of the charge is however open to the objection that\n \n it states as an established fact in the case a matter which was the subject of the most serious controversy at the trial,\n \n and upon which the guilt or in\n \n *165\n \n nocence of the defendant in the minds of the jury would depend to a great extent. Whether the defendant Light assisted in taking the turkeys to the buggy was the most important question in the case in determining his guilt. If he assisted in this act, his conduct at the time, shown by the same testimony, together with his denial, and his fabrication of an entirely different account of the matter left little room for doubt. If, as he testified, the turkeys were placed in the buggy by Sholl during his absence and without his knowledge, the case against him was much weakened. The statement that if \u2018. . . at any time between the time they took these turkeys to the buggy,\u2019 etc.,\n \n was an assumption by the court of a fact which had not been established by testimony.\n \n This was certainly injurious to the defendant. To what extent it prejudiced his case it is impossible to say. It is enough that it may have done so.\u201d\n \n\n The statement of the court during the trial that \u201cPhillips is out. . . . What happened after Phillips is eliminated?\u201d; together with the court\u2019s charge exonerating Phillips and stating that he was an innocent man \u2014 who had successfully established an alibi and been vindicated in the community and was not\n \n even near\n \n (he was within half a block one hour before the murder, according to his own testimony) the scene of the crime at the time of its commission \u2014 assumed as established and true facts, facts which had not been proved and were, at best, important controversial questions which only the jury and not the judge could decide.\n \n\n Judgment reversed and a new trial granted.\n \n\n\n *\n \n\n Italics throughout, ours.\n \n"
13
+ },
14
+ {
15
+ "text": "\n\n Per Curiam.\n \n\n Defendant pled guilty to a reduced charge of larceny from a person, MCL 750.357; MSA 28.589. In exchange for the plea, the prosecutor dismissed the original unarmed robbery charge, MCL 750.530; MSA 28.798. The parties agreed to a sentence of three to ten years in prison.\n \n\n The trial judge sentenced defendant to a prison term of\n \n IV2\n \n to 10 years. He noted that defendant\u2019s record was not that bad and that he had a good relationship with his family. The prosecutor did not object to the sentence. Neither party raised the subject of the sentence agreement.\n \n\n About a month later, the prosecutor moved to vacate the sentence. He argued that it was a violation of the sentence agreement. The judge acknowledged that he had made a mistake. He vacated the original sentence and resentenced defendant to three to ten years. He denied defendant\u2019s subsequent motion to vacate the second sentence.\n \n\n On appeal defendant argues that the trial court did not have authority to resentence him. We agree.\n \n\n A trial court\u2019s authority to resentence a defendant is limited. It depends on whether the previously imposed sentence is invalid.\n \n People v\n \n Whalen, 412 Mich 166, 169; 312 NW2d 638 (1981). We are unable to find any authority which holds that a sentence which does not follow the sentence\n \n *193\n \n agreement is invalid. Even where there is a sentence agreement, the trial court is not bound by it and must still exercise discretion when imposing sentence.\n \n People v Killebrew,\n \n 416 Mich 189; 330 NW2d 834 (1982). Since defendant\u2019s original sentence was valid, the trial court did not have authority to resentence him.\n \n\n Accordingly, we vacate defendant\u2019s sentence and remand for reinstatement of his original sentence of\n \n IVi\n \n to 10 years in prison.\n \n"
16
+ },
17
+ {
18
+ "text": "\n\n SHEPARD, J.\n \n\n This is an appeal by plaintiff from a judgment for defendant in a personal injury action.\n \n\n Facts\n \n\n Sometime between 12 midnight and 2 a. m. of May 3, 1960, at the Apache Caf\u00e9, San Bernardino, California, an altercation occurred between plaintiff, Luis Perea Rivas, age 34, weight 215, a cook\u2019s helper, and defendant Arnulfo Ayala, age 70, weight 165, a retired Santa Fe railroad worker. While the broad outlines of what happened are not in dispute, the exact details of how the altercation started in the caf\u00e9 and who the aggressor was after they left the caf\u00e9 are in sharp conflict. Because where such conflict exists this court is bound by the evidence which supports the findings and judgment of the trial court, we shall so relate it.\n \n (Brewer\n \n v.\n \n Simpson,\n \n 53 Cal.2d 567, 583 [1-2] [2 Cal.Rptr. 609, 349 P.2d 289].)\n \n\n About one year previously, Luis had been a tenant in a house belonging to Ayala and Luis had been evicted. Shortly thereafter, Luis and his uncle \u201cChevo\u201d Rivas met Ayala returning home late at night in an intoxicated condition. The two Rivas attacked Ayala, knocked him to the ground and severely beat him. Some time before midnight of the night here in question, Ayala came to the caf\u00e9, sat on a stool at the bar about 5 feet from the door and next to Mary Bareta (a niece of Luis), ordering a Seven-Up for himself and a beer for Mary. Some time after midnight Luis came in and took\n \n *241\n \n a seat at the bar several stools away and next to a Bobby Silva. A short time later Luis came close to Ayala, started a discussion involving \" Chevo, \u2019 \u2019 and challenged Ayala to fight. Ayala refused. Luis kept repeating his challenge. The bartender ordered Luis to leave Ayala alone and finally ordered Luis to get out of the bar. Luis refused. Finally, Ayala left with Luis. As he walked out he saw a knife in Luis\u2019 hand.\n \n\n Outside, Luis and Ayala walked a few paces around the corner. Luis swung his knife at Ayala. Ayala had a small pocket-knife with a 2 or 2%-inch blade which he evidently opened as they were walking. When Luis struck, Ayala struck back two or three times and cut Luis on the face and abdomen. All this took place in a few seconds or a minute. Luis walked away bleeding. Ayala did not follow him. Luis was taken to the hospital by others, for medical attention. Ayala had never before been in a knife fight.\n \n\n Peculiarly, there were at least four other persons in the bar well known to both parties. But the only witness called by either party was a taxi driver who saw the parties walk out together, saw waving arms for a few seconds but could not see who struck what blow, if any, saw blood running from Luis and called the police. He contributed nothing to the question of who the aggressor was. Judgment was rendered for defendant. Plaintiff appeals.\n \n\n Self-Defense\n \n\n Plaintiff first contends that as a matter of law defendant\u2019s actions cannot be justified as self-defense, citing as authority\n \n People\n \n v.\n \n Hinshaw,\n \n 194 Cal. 1 [227 P. 156];\n \n Fraguglia\n \n v.\n \n Sala,\n \n 17 Cal.App.2d 738 [62 P.2d 783]. The quotation given by plaintiff from the\n \n Hinshaw\n \n case provides its own answer: \u201c \u2018... that self-defense is not available as a plea to a defendant who has sought a quarrel with the design to force a deadly issue and thus, through his fraud, contrivance, or fault, to create a real or apparent necessity for making a felonious assault. \u2019 \u201d\n \n\n The court found orally that plaintiff was the aggressor and also found in its written findings \"That at the time and place of the occurrence in question the defendant herein acted in proper and reasonable self defense and used no more force than was reasonably necessary to repel the assault committed upon him by the plaintiff.\u201d\n \n\n The evidence factually supports both the oral statement of the trial court and its written finding. The\n \n Fraguglia\n \n \n *242\n \n ease involved a fight between two rubbish truck workers. The judgment there was reversed because there was no evidence that defendant was the aggressor in the sense of the rule as set forth in the\n \n Minshaw\n \n ease.\n \n\n Furthermore, the trial court may well have believed that defendant finally concluded that plaintiff would attack him in any event and that he would be at a disadvantage in the close quarters of the bar. Some of defendant\u2019s answers are inconsistent with this conclusion, but when the whole testimony is read together, with the defendant\u2019s lack of fluency and English understanding in mind, it becomes a possible conclusion. Plaintiff\u2019s continued insistence on fighting, his refusal to heed the orders of the bartender to leave defendant alone and get out, the fact that an open blade in plaintiff\u2019s hand was seen by defendant as they started out of the caf\u00e9, the fact that defendant had been previously attacked and inexcusably beaten by plaintiff and \u201cChevo,\u201d the fact that defendant obviously misunderstood the import of several questions, the answers that he was not \u201cafraid\u201d but felt it necessary to protect himself from a knife attack, plus the fact that defendant repeatedly asked plaintiff to leave defendant alone and to go away, all put together lend credence and support to such a belief. If such was the belief of the trial judge, it would render involuntary the willingness of defendant to remove the scene of the impending fight from the close quarters of the bar. In any event the evidence provided ample support for the court\u2019s finding. We find the case of\n \n Ballew\n \n v.\n \n Davis,\n \n 76 Cal.App.2d 418 [173 P.2d 317], more nearly in point. There, a photographer was found by the court to have been threatening to strike defendant because defendant was obstructing the photographer\u2019s view. Defendant therein was held to be acting in self defense when he anticipated the photographer\u2019s blow.\n \n\n Mutual Consent to Combat\n \n\n Plaintiff now contends, for the first time, that this was voluntary mutual combat and that self-defense may not be used as a defense in a damage action in case of voluntary mutual combat. But we need not examine this theory. It was not the theory upon which the ease was pleaded or tried and was never suggested to the trial judge for his consideration at any time. Theories never suggested to the trial judge, and which neither he nor the opposing party had any opportunity to consider at the trial level, are not\n \n *243\n \n properly presented for the first time on appeal. (Cal. Rules of Court, rule 14\n \n *\n \n ;\n \n Richard\n \n v.\n \n Richard,\n \n 123 Cal.App.2d 900, 903 [7] [267 P.2d 867] ;\n \n Estate of Bialy,\n \n 185 Cal.App.2d 634, 638 [3] [8 Cal.Rptr.\n \n 663]; Estate of\n \n Sayegh, 118 Cal.App.2d 327, 332 [4] [257 P.2d 995].)\n \n\n The judgment is affirmed.\n \n\n Griffin, P. J., and Coughlin, J., concurred.\n \n\n\n *\n \n\n Formerly Rules on Appeal, rule 14.\n \n"
19
+ },
20
+ {
21
+ "text": "\n\n Lummus, J.\n \n\n The plaintiff was a tenant at will of the defendants in a tenement on premises in Lexington owned by the defendants and \u201coccupied\u201d by the plaintiff and the defendants. On October 21, 1948, the defendants obtained judgment against him for possession in an action of summary process, but the issuance of execution was stayed from time to time until sometime in March, 1949. St. 1948, c. 2. St. 1949, c. 87. The plaintiff remained in possession of the tenement. On December 25, 1948, he fell on ice on a common walk on the premises. The ice resulted\n \n *476\n \n from the act of the defendant Alvan N. Day in emptying on the walk water which then froze. The defendants were seasonably aware of the condition of the walk.\n \n\n The judge found that at the time of the fall the plaintiff was merely a tenant at sufferance to whom the defendants owed merely the duty of refraining from doing him wilful or wanton injury. He found for the defendants. The Appellate Division dismissed a report, and the plaintiff appealed to this court.\n \n\n At common law a person in the position of the plaintiff was merely a tenant at sufferance.\n \n Dennett\n \n v.\n \n Nesson,\n \n 244 Mass. 299.\n \n Margosian\n \n v.\n \n Markarian,\n \n 288 Mass. 197,\n \n Mescall\n \n v.\n \n Somerset Savings Bank,\n \n 305 Mass. 575, 577. \u201cA tenant at sufferance is a bare licensee to whom the landlord owes merely the duty not wantonly nor wilfully to injure him.\u201d\n \n Margosian\n \n v.\n \n Markarian,\n \n 288 Mass. 197, 199.\n \n Carney\n \n v.\n \n Conveyancers Title Ins. & Mortgage Co.\n \n 309 Mass. 197, 200.\n \n\n We think that the rights of the plaintiff were made no greater by the recent statutes providing for stays of execution in cases of summary process. Such statutes do not extend the tenancy at will. On the contrary, they declare that the \u201ctenancy has been terminated.\u201d G. L. (Ter. Ed.) c. 239, \u00a7 9. St. 1946, c. 43. St. 1947, c. 78. St. 1948, c. 2. St. 1949, c. 87. In\n \n Dennett\n \n v.\n \n Nesson,\n \n 244 Mass. 299, where after judgment of possession a stay of execution was granted under St. 1920, c. 577, it was held that the tenancy at will had ended, that no new tenancy at will arose, and that there existed only a tenancy at sufferance. In the present case wanton or reckless conduct is neither alleged nor proved. We find no error in dealing with the plaintiff\u2019s requests for rulings.\n \n\n\n Order dismissing report affirmed.\n \n\n"
22
+ },
23
+ {
24
+ "text": "\n\n Williamson, J.\n \n\n On exceptions by defendant, Richard A. Chisholm, to the acceptance of a referee\u2019s report. Plaintiff\u2019s action to recover a real estate broker\u2019s commission was referred under rule of court with the right to except as to matters of law. The referee found for the plaintiff. The\n \n *207\n \n first objection sharply raises a jurisdictional question. The objection reads:\n \n\n \u201c1st. The Referee erred in ruling that the declaration could be regarded as having been amended, and further in ruling that the Plaintiff\u2019s action was not barred because there was no allegation in Plaintiff\u2019s pleadings that at the time of the transaction involved he was a duly licensed and qualified real estate broker under the Laws of Maine.\u201d\n \n\n The issue is whether the referee could properly find for the plaintiff in the absence of an allegation in the declaration that the plaintiff was a duly licensed real estate broker at the time the alleged cause of action arose under the provisions of\n \n R. S., Chap. 75, Sec. 7,\n \n relating to the Maine Real Estate Commission, which reads so far as we are here concerned as follows:\n \n\n \u201cNo person, partnership, or corporation engaged in the business or acting in the capacity of a real estate broker or a real estate salesman within this state shall bring or maintain any action in the courts of this state for the collection of compensation for any services performed as a real estate broker or real estate salesman without alleging and proving that such person, partnership, or corporation was a duly licensed real estate broker or real estate salesman at the time the alleged cause of action arose.\u201d\n \n\n There is no dispute about the facts on the point at issue. The bill of exceptions seen and agreed to by the plaintiff reads as follows:\n \n\n \u201cPlaintiff\u2019s writ and declaration failed to allege that Plaintiff was a duly licensed and qualified real estate broker under the Laws of Maine. The case was tried without any objection being raised to such defect or such insufficiency. The Referee found and ruled that since amendments could have been allowed, he will regard the writ and declaration as though they had been properly amended.\u201d\n \n\n \n *208\n \n The position of the referee appears from his report:\n \n\n \u201cThe action is brought in assumpsit on an account annexed and the general money counts, and issue is joined on a plea of the general issue without brief statement. The case was tried without objection being raised as to any defects in or the sufficiency of the writ and declaration. If objection had been made, any apparent defects in pleading were amendable.\n \n Jones\n \n v.\n \n Briggs,\n \n 125 Me. 265;\n \n Mansfield\n \n v.\n \n Goodhue,\n \n 53 A. (2nd) 264. Amendments could have been allowed as provided in R. S., Chap.. 100, Sec. 95. See\n \n Benson\n \n v.\n \n Newfield,\n \n 136 Me. 23, 33.\u201d\n \n\n On direct examination the plaintiff testified:\n \n\n \u201cQ. And are you a licensed real estate broker?\n \n\n A. Yes.\u201d\n \n\n On cross examination the critical fact of a license at the time the alleged cause of action arose in March 1949 was brought out from the plaintiff as follows:\n \n\n \u201cQ. How long have you been a real estate broker ?\n \n\n A. Well, now, of that I am not absolutely positive but I think it is four years I have had a broker\u2019s license and either one or two years as a salesman of real estate.\u201d\n \n\n The argument of the defendant that \u201cthere was no understanding that plaintiff was a licensed broker\u201d or in other words that the case was not tried on the theory that the plaintiff had the required license is without merit. Proof of the fact was made without objection and the fact served no useful purpose except to establish a statutory requirement. It is the allegation, not the proof, which is defective.\n \n\n This is the third case to come before us in which a real estate broker has failed to make the allegation required by statute.\n \n Gerstian\n \n v.\n \n Tibbetts,\n \n 142 Me. 215, 49 A. (2nd) 227, arose upon exceptions to a nonsuit granted upon the\n \n *209\n \n merits. The court overruled the exceptions both on the merits and for lack of the allegation. The court said, page 220:\n \n\n \u201cIf the fact that the plaintiff had a license is considered proved, it is not alleged. The very jurisdiction of the Court depends upon\n \n both\n \n allegation and proof.\u201d\n \n\n In\n \n Mansfield\n \n v.\n \n Goodhue,\n \n 142 Me. 380, 53 A. (2nd) 264, upon the sustaining of a demurrer, the defect in the allegation was cured by amendment. A second demurrer by the defendant on the ground that the defect was not amendable was overruled. Our court in sustaining the decision said on page 382:\n \n\n \u201cAssuming that such allegation does involve the right of the court to consider the case, yet there is no reason why the failure to allege such fact may not be cured by amendment. It may be true that a court without jurisdiction has no authority to allow an amendment. Yet if a court has jurisdiction of the subject matter, it may in such a case as this allow an amendment to perfect the jurisdiction on the record.\u201d\n \n\n The\n \n Gerstian\n \n and\n \n Mansfield\n \n cases stand for the principle that the allegation required by statute must appear of record. There has been no curative amendment in the case at bar. The pleadings are fatally defective, and hence the exceptions must be sustained.\n \n\n It is urged that the pleadings are to be regarded as though properly amended. No objection was raised to the defect in or sufficiency of the pleadings at the trial before the referee. An amendment could have been, and no doubt would have been allowed, had the procedures required by\n \n R. S., Chap. 100, Sec. 95,\n \n relating to amendments in referred cases, been followed. See\n \n Ford\n \n v.\n \n Whitehead,\n \n 137 Me. 125, 15 A. (2nd) 857, decided shortly before the statute was first enacted in 1941.\n \n\n \n *210\n \n Our attention is directed to\n \n Jones\n \n v.\n \n Briggs,\n \n 125 Me. 265, 132 A. 817, and\n \n Benson\n \n v.\n \n Newfield,\n \n 136 Me. 23 at 33, 1 A. (2nd) 227, which illustrate the cure of mere defects in pleadings by verdict and the treatment of pleadings on- an \u201cas if amended\u201d basis when variance appears between allegation and proof. See also\n \n Clapp\n \n v.\n \n Cumberland County Power and Light Co., 121\n \n Me. 356 at 359, 117 A. 307;\n \n Cyr\n \n v.\n \n Landry,\n \n 114 Me. 188 at 196, 95 A. 883;\n \n Wyman\n \n v.\n \n Shoe Finding Co.,\n \n 106 Me. 263, 76 A. 483.\n \n\n Such cases, however, do not touch upon the situation when jurisdiction is at stake. The court did not consider the \u201cas if amended\u201d rule applicable in the\n \n Gerstian\n \n case,\n \n supra,\n \n although no objection to the pleadings appears to have been raised by counsel at any stage of the case. In the\n \n Jones\n \n case,\n \n supra,\n \n the court said on page 266:\n \n\n \u201cAn action at law is not to be dismissed for mere defects in pleading that are amendable or may be cured by verdict if it appears that the court has jurisdiction and the plaintiff has stated a good cause of action.\u201d\n \n\n In our view the allegation required by the statute must be made of record in fact. The statute does not read that on proof of the license in a case fairly tried and without surprise the allegation may be considered as if in fact made upon the record. Such a construction would fail to give effect to the meaning and the intent of the statute.\n \n\n We may regret that the decision of the referee cannot be considered on the merits, but must be set aside for lack of a few appropriate words in the pleadings. The law here applicable, however, is found in the Act of Legislature which defined and limited the plaintiff\u2019s right to bring and maintain his action. Neither the parties nor the court can waive its provisions. It is not necessary that we pass upon the remaining objections.\n \n\n The entry will be:\n \n\n\n Exceptions sustained.\n \n\n"
25
+ },
26
+ {
27
+ "text": "\n\n Maher, J.\n \n\n Following a bench trial, defendant was convicted of possession of more than 50 grams\n \n *350\n \n but less than 225\n \n grams of a\n \n mixture containing a controlled substance (cocaine), with intent to deliver, MCL 333.7401(2)(a)(iii); MSA 14.15(7401)(2)(a) (iii), and operating a vehicle while under the influence of intoxicating liquor (ouil), MCL 257.625(1); MSA 9.2325(1). Defendant received ninety days imprisonment for the ouil conviction and the statutory minimum ten years imprisonment for the cocaine-related conviction. In this appeal as of right defendant asserts the trial court erroneously denied his motion to suppress evidence of cocaine seized during an inventory search of his vehicle. We affirm defendant\u2019s conviction for ouil, but reverse the cocaine-related conviction.\n \n\n i\n \n\n The primary issue in this case concerns the admissibility of evidence obtained during an inventory search of defendant\u2019s automobile. After defendant was arrested for ouil, the police impounded his vehicle and conducted a routine inventory search of its contents. During the search, the police discovered a package of cocaine beneath the driver\u2019s seat, plus seven more bags of cocaine contained in a golf bag in the trunk. The total amount of cocaine discovered was more than 50 grams but less than 225 grams.\n \n\n A suppression hearing was held on March 25, 1988. The testimony at the hearing established that on July 8, 1987, between 1:00 and 1:30 a.m., defendant and a friend, John Albert, left an Ann Arbor bar in defendant\u2019s vehicle, with defendant driving. Thomas Tanner, an Ann Arbor police officer, testified that he stopped defendant after observing the vehicle cross the center line of Packard Road. After noticing the police car, defendant pulled off onto a residential street where he legally\n \n *351\n \n parked his car along the curb. Tanner then performed some field sobriety tests on defendant, after which he made his decision to arrest defendant for ouil. Albert then asked Officer Tanner if he could take custody of the car. Although Tanner did not perform any sobriety tests on Albert, Tanner would not release the car to Albert because defendant indicated Albert had also been drinking and because Tanner noticed that Albert appeared unsteady, smelled of alcohol and had slurred speech. Albert was given the option of either walking away or having a cab called. Albert then went back and conferred with defendant in the police car. After Albert told defendant that the police officer would not let him take the car, defendant asked Albert to arrange for defendant\u2019s wife or attorney to pick up the car. When Albert spoke to Officer Tanner about this request the officer told him the car was being impounded, and it was now their car. Albert eventually left on foot. Shortly thereafter, the police conducted their inventory search of the car and the cocaine was discovered.\n \n\n The sole reason given at the suppression hearing for the impoundment of defendant\u2019s vehicle was that impoundment was authorized under Ann Arbor Ordinances, 10:139, which provided:\n \n\n 1. A police officer may immediately remove and impound a vehicle in any of the following situations.\n \n\n i. The driver of a vehicle is taken into custody by the Police Department and such vehicle would thereby be left unattended.\n \n\n Once impounded, standard departmental policy required a police officer to thoroughly search the vehicle to determine what, if any, articles of value were present.\n \n\n \n *352\n \n Following the suppression hearing, the trial court found that the search of defendant\u2019s vehicle was valid and therefore ruled the cocaine was admissible. On May 31, 1988, the day scheduled for trial, defendant waived his right to a jury trial and it was agreed that the record of the suppression hearing plus certain other stipulated facts would serve as the basis for the trial record. Thereafter, the court again upheld the validity of the inventory search and found defendant guilty of possession of cocaine with intent to deliver and OUIL.\n \n\n ii\n \n\n Defendant argues on appeal that the trial court erred in failing to suppress the cocaine because the impoundment of his vehicle violated his Fourth Amendment rights as an unreasonable search and seizure. A trial court\u2019s ruling at a suppression hearing is reviewed under the clearly erroneous standard. The court\u2019s decision will be affirmed unless, upon a review of the record, this Court is left with a definite and firm conviction that a mistake was made.\n \n People v Burrell,\n \n 417 Mich 439, 448; 339 NW2d 403 (1983). Upon our review of the record, we agree the trial court clearly erred in denying defendant\u2019s motion to suppress the cocaine.\n \n\n The Fourth Amendment to the United States Constitution, made applicable to the states by way of the Fourteenth Amendment,\n \n Mapp v Ohio,\n \n 367 US 643; 81 S Ct 1684; 6 L Ed 2d 1081 (1961), guarantees \"[t]he right of the people to be secure in their persons, houses, papers, and effects, against unreasonable searches and seizures.\u201d US Const, Am IV. A search and seizure without a warrant is unreasonable per se and violates the\n \n *353\n \n Fourth Amendment of the United States Constitution unless shown to be within one of the exceptions to the rule.\n \n People v Reed,\n \n 393 Mich 342, 362; 224 NW2d 867 (1975). The burden is always on the state to show an exception exists.\n \n Id.; Coolidge v New Hampshire,\n \n 403 US 443; 91 S Ct 2022; 29 L Ed 2d 564 (1971).\n \n\n The leading case concerning the validity of inventory searches of impounded automobiles is\n \n South Dakota v Opperman,\n \n 428 US 364; 96 S Ct 3092; 49 L Ed 2d 1000 (1976). In that case, the United States Supreme Court upheld an inventory search of a\n \n lawfully impounded\n \n vehicle. The search in that case was prompted by the presence, in plain view, of a number of valuables inside the car. The Court looked at all the facts and circumstances in the case and, after noting that \"police intrusions into automobiles\n \n impounded or otherwise in lawful police custody\u201d\n \n have been consistently sustained \"where the process is aimed at securing or protecting the car and its contents,\u201d\n \n id.\n \n at 373, determined that an inventory search performed as a caretaking function pursuant to standard police procedure did not constitute an unreasonable search and seizure under the Fourth Amendment.\n \n Id.\n \n at 375-376 (emphasis added).\n \n\n In\n \n Opperman,\n \n the vehicle had been impounded for violation of a parking ordinance and the validity of the initial impoundment was not at issue. However,\n \n Opperman\n \n did recognize the authority of the police to remove and impound vehicles in the interest of public safety and as part of a \"community caretaking function.\u201d\n \n Id.\n \n at 368. Such authority, however, is not absolute. According to\n \n Opperman,\n \n the validity of a police intrusion must be examined by analyzing the reasonableness of the seizure under all the circumstances and each case must be decided on its own facts.\n \n Id.\n \n at 372-373.\n \n\n \n *354\n \n\n Opperman\n \n recognized various situations in which the impoundment and removal of a vehicle will generally be upheld. Such situations include instances where removal is required to permit the uninterrupted flow of traffic or to preserve evidence, instances where the vehicle is disabled or damaged, and instances where a parking ordinance is violated, thereby jeopardizing both public safety and efficient movement of vehicular traffic.\n \n Id.\n \n at 368-369. The\n \n Opperman\n \n Court recognized that, once a vehicle is impounded, an inventory search pursuant to standard police procedure will generally be upheld where its purpose is to (1) protect the owner\u2019s property while the vehicle remains in police custody, (2) protect the police against claims or disputes over lost or stolen property, and (3) protect the police from potential danger.\n \n Id.\n \n at 369.\n \n\n Although defendant raises several issues concerning both the initial impoundment and the subsequent inventory search, we agree the dispositive issue in this case involves the legitimacy of the initial impoundment. In\n \n People v Krezen,\n \n 427 Mich 681, 685-686; 397 NW2d 803 (1986), our Supreme Court applied the\n \n Opperman\n \n rationale in deciding just such a case. In that case, the defendant was arrested at an airport after picking up a package containing cocaine. After the arrest, the authorities impounded defendant\u2019s automobile, which was lawfully parked in an air freight parking lot. A brown vial containing cocaine residue and other paraphernalia were discovered in defendant\u2019s purse, which was lying visibly on the front seat of the car. In a plurality opinion, the Court upheld the admission of the cocaine found in the purse.\n \n\n Writing for the majority, Justice Boyle (with Justices Riley and Brickley concurring) followed\n \n *355\n \n\n Opperman, supra,\n \n and indicated that the reasonableness of the impoundment was to be determined from the particular facts of the case.\n \n Id.\n \n at 686. In her majority opinion, Justice Boyle noted that the possibility of theft or vandalism has been recognized as a valid reason for impounding a car upon the arrest of the driver,\n \n id.\n \n at 688, and ultimately upheld the impoundment as being a reasonable caretaking function instituted according to standard departmental policy.\n \n Id.\n \n at 686-689.\n \n\n In a dissenting opinion, however, Justice Levin (with Justices Cavanagh and Archer concurring) did not believe that an impoundment was justified merely to avoid the possibility of theft. He agreed with this Court\u2019s statement that the possibility of claims against the police for loss or damages does not compare in importance with the preservation of basic constitutional guarantees.\n \n 1\n \n\n Id.\n \n at 703-705.\n \n\n In the swing vote, Justice Williams likewise indicated he was not persuaded that the impoundment was reasonable. However, he agreed the police could not have reasonably left the purse in plain view and rationalized that, because returning the purse to defendant would have inevitably led to the discovery of the cocaine, its admission should be upheld.\n \n Id.\n \n at 697-698.\n \n\n In applying the principles of\n \n Opperman\n \n and\n \n Krezen,\n \n we begin our analysis of the case at hand with the initial recognition that the burden was on the police to establish the reasonableness of the impoundment when considering the circumstances of the case. Having carefully reviewed the record, we do not believe this burden was met.\n \n\n The evidence in this case established that the vehicle was lawfully parked along the curb of a\n \n *356\n \n residential street. There was no evidence that the vehicle constituted a safety hazard or otherwise impeded the flow of traffic. Furthermore, there was no indication that it was necessary to impound the vehicle to protect the police from potential danger or that the impoundment was otherwise necessary as an incident to defendant\u2019s arrest. Finally, there was no evidence that any items of value were visibly present in the car.\n \n\n The trial court, after indicating it had reviewed\n \n Krezen,\n \n stated the following as its basis for denying defendant\u2019s motion to suppress:\n \n\n We have had so many suits in this county against police officers and other persons where people claim they lost things in the vehicle, the officer stole something from the vehicle or things are missing. So this Court finds that the officer did act reasonably when he decided that he wasn\u2019t going to leave the vehicle in the street and he was going to impound it and he felt that he should inventory what was in it so that somebody couldn\u2019t claim that he had stolen something.\n \n\n I am not suggesting that the Defendant would have claimed this. But the officer doesn\u2019t know that, the officer has to act with what he has.\n \n\n It is clear that the trial court relied on the reasoning of\n \n Krezen\n \n and determined that concern over potential liability for missing or stolen items was sufficient justification for the impoundment. However, the basis for the trial court\u2019s conclusion is not supported by the record. The police officer did not give as a reason for impoundment concern for either the safety of the vehicle or its contents, nor was there any evidence presented establishing the possibility of theft or vandalism as a reasonable concern. Instead, the only reason given for the impoundment was that it was authorized by local\n \n *357\n \n ordinance in a situation where a driver is taken into custody and the vehicle would thereby be left unattended.\n \n\n Although\n \n Krezen\n \n recognized the lawful impoundment of a vehicle as a reasonable caretaking function, we do not believe that\n \n Krezen\n \n stands for the proposition that an impoundment will be upheld as a reasonable caretaking function in the absence of evidence creating a reasonable concern for the actual safety of the vehicle or its contents. Indeed, an examination of\n \n Krezen\n \n clearly reveals that the decisive factor in that case was the presence of defendant\u2019s purse in plain view on the front seat of the car. In her majority opinion, Justice Boyle was very careful to indicate that the decision upholding the reasonableness of the impoundment was controlled by the particular facts of the case.\n \n Krezen, supra\n \n at 686 and 697. She expressly indicated that a departmental policy requiring impoundment upon arrest, regardless of the surrounding factual circumstances, may well involve situations where an impoundment would be violative of the Fourth Amendment as an unreasonable seizure.\n \n Id.\n \n at 685-686. Justice Levin agreed with this proposition in his dissent, stating, \"[cjourts generally have concluded that to justify impoundment something more must be shown than simply that the vehicle would otherwise be left unattended.\u201d\n \n Id.\n \n at 708. The \"something more\u201d that existed in\n \n Krezen\n \n was the presence of a purse in plain view on the front seat of the car.\n \n\n In our view, the impoundment of defendant\u2019s vehicle would have been found unreasonable under\n \n Krezen.\n \n We reach this conclusion based on the fact that while\n \n Krezen\n \n contained specific facts establishing a reasonable concern for the safety of the vehicle\u2019s contents, there were no such facts in this case. Further, in spite of\n \n Krezen\u2019s\n \n more com\n \n *358\n \n pelling facts for upholding the validity of the impoundment, only three of the seven justices were able to find the seizure to be reasonable.\n \n\n Because the only reason given for the impoundment in this case was the existence of the local ordinance, and because there were no other facts presented that would otherwise justify the impoundment of the vehicle as being reasonable under the circumstances, we find that the police did not sustain their burden in establishing the reasonableness of the seizure. Further, we believe that, by relying upon the possibility of theft or vandalism as its basis for finding the impoundment to be proper, the trial court clearly erred when this concern was neither expressed at the hearing nor supported by the record.\n \n\n Inasmuch as the cocaine would not have been discovered absent the impoundment, its admission should have been suppressed. Moreover, because defendant could not have been convicted for possession of cocaine with intent to deliver had evidence of the cocaine not been admitted, this conviction is reversed and the charge shall be dismissed. With respect to defendant\u2019s conviction for ouil, however, the improper admission of the cocaine was harmless error. Therefore, the ouil conviction is affirmed.\n \n\n hi\n \n\n Finally, defendant raises various sentencing issues.\n \n\n Defendant initially claims the trial court erred by failing to give retroactive effect to the recent amendments to MCL 333.7401; MSA 14.15(7401). These amendments reduce the statutory minimum sentence for defendant\u2019s cocaine conviction to five years and also allow the sentencing court to de\n \n *359\n \n part from the minimum sentence requirement where there exist substantial and compelling reasons to do so. However, in light of our reversal of defendant\u2019s conviction under this statute, this issue has been rendered moot. In any event, we agree with the prior decisions of this Court holding that the amendments to the statute are not retroactive.\n \n People v Becoats,\n \n 181 Mich App 722; 449 NW2d 687 (1989);\n \n People v Jackson,\n \n 179 Mich App 344; 445 NW2d 513 (1989).\n \n\n Lastly, we find no merit to defendant\u2019s argument that he was deprived of his right of allocution at sentencing. The court twice offered him an opportunity to speak and he declined each time. Additionally, the record does not support defendant\u2019s contention that his opportunity to allocute was rendered meaningless because the court had already determined the sentence it was going to impose during an in-chambers conference. See\n \n People v McNeal,\n \n 150 Mich App 85; 389 NW2d 708 (1985). Therefore, defendant\u2019s sentence for the ouil conviction is affirmed.\n \n\n Affirmed in part and reversed in part.\n \n\n\n 1\n \n\n See\n \n People v Krezen,\n \n 143 Mich App 34, 40; 371 NW2d 882 (1985).\n \n"
28
+ },
29
+ {
30
+ "text": "\nOPINION\nNORTON, Judge.\nAppellant Theresa Ann McColley Schmitz (\u201cMcColley\u201d) seeks review of the trial court\u2019s denial of her motion for a new trial in a paternity action. After a jury returned a verdict in favor of respondent Mark Stransky a\/k\/a Mark Rand (\u201cStran-sky\u201d), McColley moved for a new trial alleging as error three evidentiary rulings.\nRespondent Stransky filed a notice of review seeking review of orders which require him to reimburse the county for attorney fees expended on his behalf and which refuse to appoint counsel to represent him on this appeal. Stransky now argues that Minn.Stat. \u00a7 257.69 is unconstitutional because it violates equal protection by conferring privileges upon custodial parents during paternity actions and that the statute violates the Civil Rights Act of 1964 and the Minnesota Human Rights Act by favoring custodial parents, who are predominately female. Stransky argues that the trial court erred both in requiring him to reimburse Rice County for attorney fees incurred after he was employed and in failing to appoint paid counsel to represent him on this appeal.\nBecause Stransky challenges the constitutionality of statutes, the State of Minnesota intervenes to defend those statutes. We reverse.\nFACTS\nMcColley alleges that she and Stransky began a sexual relationship in approximately March of 1981. She lived with Stransky in his home during August and September of 1981, and then moved out. Stransky and McColley saw less of one another after she moved out. In early November of 1981, McColley\u2019s physician performed a pregnancy test whieh was returned negative. In early December 1981, a second test came back positive. A later ultrasound examination placed the date of conception at approximately October 15th. Expert testimony later placed conception between October 20 and November 1, 1981. The full-term child was born July 7, 1982. This paternity action was commenced in May of 1984.\nMcColley had undergone blood testing in an unrelated paternity action in June of 1982 and she and her new child gave samples for this action in 1984. Blood testing performed on Stransky resulted in a 98.082% likelihood of paternity. McColley had intercourse with two other men during *458the probable period of conception, once on October 29, 1981 and once on October 31, 1981. She claims that her sexual relationship with Stransky was continuing at a rate of once or twice a week until October 22, 1981, which she says is the last time that she and Stransky had intercourse. Stran-sky denies that he ever had a sexual relationship with McColley. Blood tests performed on the other two men exclude both as potential fathers. McColley insists that these three men were the only men with whom she had sexual intercourse during the likely conception time.\nAt the trial, the main issue for the jury was whether to believe McColley and the blood test or Stransky. McColley sought to introduce Exhibit 9, the child\u2019s baby book, in which she had recorded visits and gifts to the child from Stransky\u2019s mother and sister. She also sought to introduce Exhibit 10, a photograph of the new baby sleeping under a crocheted afghan presented by Stransky\u2019s mother. The trial court excluded Exhibits 9 and 10 on the grounds that they were not relevant. McColley challenges the exclusion.\nIn support of her allegation that she had been involved in a sexual relationship with Stransky, McColley testified that she had seen a tattoo on his abdomen below the belt line. Stransky made an offer of proof, to pull down his pants, before the jury. The jury was then removed and Stransky, his attorney, McColley\u2019s attorney and the judge retired to chambers. Upon their return, the parties stipulated that Stransky did not presently have a tattoo in that location. McColley challenges the offer of proof as an admission of demonstrative evidence which does not tend to support conditions existing at the time relevant to the proceeding.\nMcColley sought to impeach Stransky\u2019s testimony through evidence of two convictions, one for aggravated robbery and theft in 1979 and another for burglary in 1982. Stransky was incarcerated after each conviction. The trial court excluded this evidence as prejudicial and stale; McColley challenges that ruling.\nThis action was brought by the Rice County Attorney\u2019s office on McColley\u2019s behalf. Stransky was unemployed when the action commenced. Because he was indigent, a court-appointed and paid attorney represented him. Stransky later became employed and the trial court determined that from September 1, 1986 he was responsible to reimburse the county for the attorney fees it incurred on his behalf. Stransky was also ordered to pay temporary child support in the amount of $115 per month, pending resolution. Those funds were held in escrow by the County. When the jury returned a verdict in favor of Stransky, the court ordered the es-crowed support refunded, minus the amount owed to reimburse attorney fees and costs. Stransky argues that it is unconstitutional for McColley to receive free legal representation while he is required to pay for his advocate.\nISSUES\n1. Did the trial court err in refusing to admit Exhibits 9 and 10?\n2. Did the trial court err in permitting the offer of proof to be made before the jury?\n3. Did the trial court err in refusing to permit evidence of prior convictions for the purpose of impeachment?\n4. Does Minn.Stat. \u00a7 257.69 unconstitutionally deny noncustodial parents equal protection of laws?\n5. Does Minn.Stat. \u00a7 257.69 violate Minn.Stat. \u00a7 363.03, subd. 4 and the Civil Rights Act of 1964?\n6. Did the trial court err in requiring Stransky to reimburse the county his attorney fees?\n7. Did the trial court err in not appointing counsel to represent Stransky on appeal?\nANALYSIS\nI.\nMcColley challenges the trial court\u2019s refusal to admit Exhibits 9 and 10. The trial court\u2019s ruling on evidentiary mat*459ters must be sustained in the absence of a clear abuse of discretion. State v. Brouillette, 286 N.W.2d 702, 707 (Minn.1979). The trial court excluded Exhibits 9 and 10 on the grounds that gifts from Stransky\u2019s mother would not be probative of whether Stransky is the father of the child. The trial court went on to state that even if the evidence were probative, it was too prejudicial and would be excluded under Minn.R. Evid. 403.\nThe trial court reasoned that the evidence was irrelevant because it could tend to prove only what Stransky\u2019s mother believed or wished and could not tend to prove whether her son was in fact the father. The trial court was well within its discretion in excluding this evidence.\nII.\nStransky made an offer of proof that he did not have a tattoo which McColley had described and alleged he had. Her allegation was apparently intended to corroborate her assertion that she had been intimate with Stransky. The offer was made in response to McColley\u2019s assertion. McColley argues that the trial court erred in denying her motion for a new trial which was based upon the offer of proof having been made in front of the jury.\nOffers of proof are to be made outside the hearing of the jury. Minn.R. Evid. 103(c). However, McColley testified that she had seen a particular tattoo on Stransky in 1981. Stransky testified that he did not have such a tattoo, that he had never had such a tattoo and that he had never had a tattoo removed. The record discloses that as soon as Stransky\u2019s attorney offered to have Stransky remove his pants, McColley\u2019s attorney objected. The jury was removed; the parties retired to chambers, Stransky was examined, and the parties returned to stipulate before the jury that Stransky did not presently have a tattoo. McColley objects to the offer of proof (that Stransky would pull down his pants in court) on the basis that it is inadmissible to show whether he had a tattoo in 1981. Because McColley first alleged the tattoo had existed, and then stipulated that no tattoo presently existed, the trial court did not abuse its discretion in making this evidentiary ruling.\nIII.\nMcColley sought to impeach Stran-sky\u2019s testimony by introducing evidence of Stransky\u2019s prior criminal convictions. As he did in the trial court, Stransky attempts to minimize the importance of credibility in this case by instead criticizing the validity of the blood tests which show he is very probably the father. Although no transcript of the video deposition was provided, the expert through whom those tests were introduced, Dr. Polesky, gave uncontradict-ed testimony indicating that the blood tests were not damaged or invalid in any way. Stransky\u2019s allegations that the blood was damaged in transit, or that McColley\u2019s sample was corrupted with genetic material from her unborn child flatly contradict the whole of the expert testimony in the record.\nDr. Polesky explained that the blood samples are placed in test tubes containing preservatives and are inspected before testing. He testified that no damage to these particular samples was observed. He also testified that a child younger than six months may carry some of its mother\u2019s genetic markers; not that a pregnant mother\u2019s blood contains markers from her unborn child. In mischaracterizing the evidence, Stransky seeks to avoid that the crux of this case is in the question of his credibility. Because the blood tests were not invalid, the central issue in this case is not simply paternity; the focal point becomes the credibility of the witness who denies it.\nMcColley sought to introduce evidence of two prior crimes: Stransky pleaded guilty to simple robbery on April 25, 1979 and to burglary on October 13, 1982. The trial court granted Stransky\u2019s motion in limine, relying on Minn.R.Evid. 609 and State v. Jones, 271 N.W.2d 534 (Minn.1978). The trial court held that the probative value of the evidence did not outweigh its prejudicial effect and that the convictions were stale. With regard to staleness, *460the applicable rule provides in pertinent part:\n(b) Time Limit. Evidence of a conviction under this rule is not admissible if a period of more than ten years has elapsed since the date of the conviction or of the release of the witness from the confinement imposed for that conviction, whichever is the later date, unless the court determines, in the interests of justice, that the probative value of the conviction supported by specific facts and circumstances substantially outweighs its prejudicial effect. * * *\nMinn.R.Evid. 609 (emphasis added).\nThe summons and complaint in this action were served in May of 1984, and trial began on April 18, 1989. Stransky\u2019s first conviction was obtained on April 25, 1979 and Stransky was sentenced to one year in jail. The evidence does not disclose the actual release date. The second conviction on October 13, 1982 resulted in a sentence of eighteen months, with credit for time served. Again, the record does not reveal his actual release date. Because the rule counts time from the date of conviction or the date of release, whichever is the later date, the trial court erred in counting instead from the date of the convictions, which was the earlier date. The convictions both, therefore, are admissible under Minn.R.Evid. 609(b).\nThe error is compounded by the trial court\u2019s analysis of the Jones factors. They are:\n(1) the impeachment value of the prior crime, (2) the date of the conviction and the defendant\u2019s subsequent history, (3) the similarity of the past crime with the charged crime (the greater the similarity, the greater the reason for not permitting use of the prior crime to impeach), (4) the importance of defendant\u2019s testimony, and (5) the centrality of the credibility issue.\nState v. Jones, 271 N.W.2d at 538. In Jones, a criminal case, the supreme court noted that the rules of evidence were not in effect at the time of Jones\u2019 trial. In the civil case here, the rules of evidence apply and require admission of the impeachment evidence. The applicable rule provides in pertinent part:\n(a) General Rule. For the purpose of attacking the credibility of a witness, evidence that he has been convicted of a crime shall be admitted if * * * the crime (1) was punishable by * * * imprisonment in excess of one year * * * and the court determines that the probative value * * * outweighs its prejudicial effect, or (2) involved dishonesty or false statement, regardless of the punishment.\nMinn.R.Evid. 609 (emphasis added). This rule contains a bright line test which requires admission if the crimes involved dishonesty, without regard to the weighing test of prejudice against probative value contained in part (a)(1).\nAlthough the trial court erred in applying the Jones factors to weigh prejudice against probity, it found correctly that the convictions were for crimes of dishonesty. See State v. Stanifer, 382 N.W.2d 213, 218 (Minn.Ct.App.1986) (robbery is a crime of dishonesty under rule 609(a)). The trial court erred in determining that although Stransky\u2019s crimes involved dishonesty, the prejudicial effect outweighed the probative value of admitting this evidence. Because the rule requires admission of prior crimes evidence if the crime involves dishonesty, the trial court abused its discretion. The convictions are admissible to impeach Stransky\u2019s credibility without regard to any prejudicial effect. Minn.R.Evid. 609(a)(2). The crimes were not stale. Minn.R.Evid. 609(b). Therefore, the trial court\u2019s remaining analysis is unnecessary and erroneous.\nStransky now argues that a recent proposed amendment to Minn.R.Evid. 609 demonstrates that his convictions should be inadmissible against him. He is mistaken. Prior crimes evidence permits the jury to see the \u201cwhole person.\u201d State v. Heidelberger, 353 N.W.2d 582, 589 (Minn.Ct.App.1984), pet. for rev. denied (Minn. Sept. 12, 1984) (citing Brouillette, 286 N.W.2d at 707). Because the testimony of the two key witnesses was diametrically opposed, their credibility was critical when the jury was asked to choose. McColley was enti-*461tied to have the jury informed about the background of the person whose testimony it was asked to believe. The exclusion of this evidence on the crucial question of Stransky\u2019s credibility is an error neither insignificant nor harmless. For this reason, we disagree with the dissent.\nIV.\nStransky questions the constitutionality of Minn.Stat. \u00a7 257.69 (1988), which permits governmental legal representation of a custodial parent in paternity proceedings. He contends this statute irrationally favors custodial parents and denies equal protection to others.\nStatutes benefit from a presumption of constitutionality. Minn.Stat. \u00a7 645.17(3) (1988). Stransky must demonstrate compelling evidence in order to rebut this presumption. Equal protection requires that persons similarly situated be treated alike. City of Cleburn v. Cleburn Living Center, 473 U.S. 432, 439, 105 S.Ct. 3249, 3253, 87 L.Ed.2d 313 (1985). Different treatment passes constitutional muster where the distinction in question is rationally based upon a legitimate governmental purpose. See Dandridge v. Williams, 397 U.S. 471, 485, 90 S.Ct. 1153, 1161, 25 L.Ed.2d 491 (1970). The criteria against which the statute in question must be measured are whether 1) the classification uniformly applies to those who are similarly situated, 2) the distinctions are genuine and provide a reasonable basis to justify different legislation, and 3) the classification is relevant to the purpose of the law. Schwartz v. Talmo, 295 Minn. 356, 362, 205 N.W.2d 318, 322 (1973), appeal dismissed 414 U.S. 803, 94 S.Ct. 130, 38 L.Ed.2d 39 (1974).\nThe purpose of a paternity proceeding under Minn.Stat. \u00a7\u00a7 257.51-.74 is to establish parentage and provide support for the child. Because the custodial parent has already acknowledged parenthood and is providing support, the distinction between custodial parents and others is genuine and provides a reasonable basis to justify different treatment. This distinction between acknowledged parents who are providing support and alleged parents who are not providing support is relevant to the purpose of the law. Stransky argues that all parties to a paternity action are similarly situated and should be entitled to free legal services in the interest of determining parentage and support issues. We do not agree that an alleged father who denies paternity is similarly situated to a custodial parent who is providing support. Stran-sky\u2019s argument that the government is obligated to provide him a legal representative because it has provided one to McCol-ley is meritless. Stransky has failed to demonstrate that the statute unconstitutionally violates his equal protection of laws.\nV.\nStransky also alleges that Minn. Stat. \u00a7 257.69 violates the Minnesota Human Rights Act, specifically Minn.Stat. \u00a7 363.03, subd. 4, and the Civil Rights Act of 1964. Stransky\u2019s argument appears directed at the fact that many of the custodial parents who may receive benefits are women.\nMinn.Stat. \u00a7 257.69 benefits custodial parents, regardless of gender. Stransky is ineligible for court-appointed and paid legal representation not because he is male, but because he is not a custodial parent.\nVI.\nStransky alleges the trial court erred in ordering him to reimburse Rice County for attorney fees incurred by it since September 1, 1986. When this action began in 1984, Stransky had been recently released from prison and was unemployed. Minn.Stat. \u00a7 257.69, the constitutionality of which we have just upheld against Stran-sky\u2019s challenge, governs appointment of counsel in paternity actions. The statute states in pertinent part: \u201cThe court shall appoint counsel for a party who is unable to pay timely for counsel\u201d in paternity actions. Minn.Stat. \u00a7 257.69, subd. (1) (1988). A public defender was appointed at county expense and Stransky was ordered to inform the county when he became employed *462and again had income. In 1986 the court learned that Stransky had become employed and it ordered him to pay attorney fees from September 1, 1986 forward.\nStransky argues that the statute does not require complete reimbursement of attorney fees. He asserts that the trial court should have entered specific findings regarding his ability to pay and that if he is not capable of repaying in a reasonable period, such as a year, the fees should be forgiven. The statute states in relevant part that \u201cthe court shall require a party to pay part of the fees of court-appointed counsel according to the party\u2019s ability to pay * * Minn.Stat. \u00a7 257.69, subd. 2 (emphasis added). There is absolutely no authority for the proposition that if Stran-sky delays payment, he then should not be required to reimburse the county. Rather, after Stransky became employed, the court found that he was no longer entitled to court-appointed counsel. The trial court did not err in its application of this statute to Stransky.\nVII.\nStransky also argues that the trial court erred in failing to appoint counsel to represent him on this appeal. Stransky argues that the court applied the wrong standard in refusing him court-appointed appellate counsel. As noted above, the trial court found that Stransky was \u201cindigent within the meaning of Minn.Stat. \u00a7 257.69, subd. 1.\u201d The statute does not contain the word \u2018indigent.\u2019 The correct language, as discussed above, is \u201cunable to pay timely.\u201d However, the case upon which Stransky relies, Hepfel v. Bashaw, 279 N.W.2d 342 (Minn.1979), was decided before the present statute was enacted. Similarly, Stransky relies upon the model Uniform Parentage Act, although the Minnesota Legislature adopted a revised version in 1980.\nHepfel and the UPA are unnecessary to resolution of this problem. Minn.Stat. \u00a7 257.69 does not refer specifically to appellate counsel. This statute orders court-appointed counsel for those \u2018unable to pay timely\u2019 which was Stransky\u2019s situation when the action began. When Stransky became employed, the trial court ordered him to take responsibility for his legal fees. The trial court did not err in refusing to appoint counsel for Stransky in this appeal.\nDECISION\nThe trial court abused its discretion in refusing to permit prior crimes evidence for impeachment of Stransky\u2019s testimony. McColley is entitled to a new trial. The constitutionality of the statute in question is affirmed. Stransky is not entitled to court-appointed counsel and must reimburse the county for the expenses it has incurred on his behalf.\nReversed.\n"
31
+ }
32
+ ]
data/curated_samples/freelaw_raw.json ADDED
The diff for this file is too large to render. See raw diff
 
data/curated_samples/pg19_raw.json ADDED
The diff for this file is too large to render. See raw diff
 
data/curated_samples/philpapers_raw.json ADDED
The diff for this file is too large to render. See raw diff
 
data/curated_samples/pubmed_extract.json ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "pmid": 38062597,
4
+ "abstract": "\nTo investigate the effect of the haemoglobin, albumin, lymphocyte, and platelet (HALP) score (Haemoglobin, Albumin, Lymphocyte, Platelet count) on survival as a new prognostic factor in metastatic bladder cancer.\nDescriptive study. Place and Duration of the Study: Department of Medical Oncology, Celal Bayar University, Manisa, Turkey, and Adnan Menderes University, Aydin, Turkey, from 2010 to 2020.\nThe medical charts of patients with metastatic bladder cancer were reviewed retrospectively. Prognostic value of the HALP score as a marker of overall survival was examined through a receiver operating characteristic (ROC) curve analysis.\nThe cut-off value for the HALP score in the ROC curve analysis was 29. The median overall survival (OS) was 19 months when the HALP score was less than 29, and the median OS was 40 months when the HALP score was 29 or greater, and this finding was statistically significant (p = 0.003).\nThe HALP score is closely related to prognosis in metastatic bladder cancer. A high HALP score is associated with better survival outcomes.\nHALP score, Metastatic bladder cancer, Overall survival.\n",
5
+ "title": "HALP Score as a New Prognostic Factor for Patients with Metastatic Bladder Cancer."
6
+ },
7
+ {
8
+ "pmid": 38062602,
9
+ "abstract": "\nTo determine the popular subspeciality choices among ophthalmology residents, to explore the factors affecting that choice, and to identify the obstacles that they face while selecting a subspeciality fellowship.\nMixed-methods study. Place and Duration of the Study: Department of Ophthalmology of Hayatabad Medical Complex and Lady Reading Hospital, Peshawar, from 1st September to 30th November, 2021.\nThe qualitative (1st) phase comprised of focussed group discussion (FGD) followed by an online in-depth interview (IDI). Quantitative (2nd) phase was conducted as online survey and consisted of currently enrolled FCPS-ophthalmology residents. Themes were generated and used to modify the pre-designed questionnaire. Questionnaire was piloted and necessary adjustments were made. In the 2nd phase, the online link of the questionnaire was shared with the eligible participants all over the country. For the association between categorical variables, the Pearson Chi-square test was used with significance at p-value <0.05. \u00a0 Results: Nine participants were recruited in the first phase and three themes were generated. In the second phase, a total of 138 responses were received, among which mean age was 29.37 + 3.9 years, and 74 (53.6%) participants were males. The top three subspeciality choices were cataract and refractive surgery (n = 31, 22.5%), vitreoretinal surgery (n = 30, 21.7%) and orbit and oculoplasty (n = 13, 9.4%). Sixty-two (48.8%) participants stated interest in the career to be the main reason to choose a particular fellowship, and 54 (42.5%) participants identified challenging subspeciality as the main obstacle. An association between gender and the intended subspeciality showed significant results (p = 0.029).\nOphthalmology residents of Pakistan have different approaches and motivations for selecting a fellowship programme. Professional needs and scientific reasons are their main motivations.\nOphthalmology, Fellowship, Motivations, Career choices, Mentor, Pakistan.\n",
10
+ "title": "Subspeciality Interests Among the Ophthalmology Residents: A Mixed-Methods Study."
11
+ },
12
+ {
13
+ "pmid": 38062599,
14
+ "abstract": "\nTo assess the stone-free rate (SFR) subsequent to percutaneous nephrolithotomy (PCNL) in patients with anatomically anomalous kidneys.\nCross-sectional study. Place and Duration of the Study: Department of Urology, Sindh Institute of Urology and Transplantation Karachi, from 23 July 2020 till 30\u00a0October 2021.\nSixty-five patients of renal stone disease with abnormal kidneys, aged 18-60 years of both genders were enrolled in this cross-sectional study. Demographic information like age, gender, stone size, duration of disease, and type of abnormality were noted. After PCNL, stone-free status was determined after 2 weeks of the procedure by performing ultrasound KUB.\nA total of 65 patients were included in this study with mean age of 36.37 \u00b1 12.86 years [Range: 18-60]. There were 76.9% of males and 23.1% of females. Regarding anatomical malformation, 46.2% were malrotated kidneys, 16.9% were horseshoe kidneys, 16.9% were partial Duplex system, 9.2% had bifid pelvis. Median duration of the disease was 12 (IQR=10). Forty-one patients (63.1%) had single and 24 (36.9%) had multiple number of stones with average size of 3.26 \u00b1 1.14 cm. SFR after PCNL in patients with abnormal kidneys was 70.77% (46\/65) while 29.23% (19\/65) were observed with residual fragments. Out of 19 cases with residual fragments, 10 (53.2%) had stone size <1 cm and 9 (47.4%) had stone of 1 to 3 cm. Sixteen out of 19 patients with residual stones were treated with ESWL (most required: one session), and re-do PCNL was performed in three cases.\nPCNL is an effective and safe operation in anatomically anomalous kidneys. For satisfactory outcomes, it requires extreme care and exceptional surgical skill.\nMalrotated kidney, Duplex system, Kidney anomaly, Horseshoe kidney, PCNL, Extracorporeal shockwave lithotripsy (ESWL).\n",
15
+ "title": "Percutaneous Nephrolithotomy in Anomalous Kidney."
16
+ },
17
+ {
18
+ "pmid": 38062601,
19
+ "abstract": "\nThis review evaluated the risks and survival benefits of pancreatoduodenectomy associated with venous resection compared with palliative surgery. A systematic review with meta-analysis was performed. Higher overall survival was observed in the pancreatic resection group (HR = 4.000; 95% CI 2.800 to 5.200). However, the palliative group had fewer complications (RD = -0.170; 95% CI -0.260 to -0.070). There was no significant difference in the mortality rates (RD = 0.000; 95% CI -0.030 to 0.030). In centres with experience in pancreatic surgery, resection may be considered for locally advanced cancer and major venous invasion. Pancreaticoduodenectomy with vascular resection may improve survival for periampullary tumours compared with palliation therapy. However, pancreaticoduodenectomy with major venous resection has potentially higher morbidity than palliation therapy. Key Words: Pancreatoduodenectomy, Pancreatic neoplasms, Vascular surgical procedures.\n",
20
+ "title": "Pancreatoduodenectomy with Venous Resection or Palliative Therapy? A Meta-Analysis."
21
+ },
22
+ {
23
+ "pmid": 38062600,
24
+ "abstract": "\nThe primary aim of this review was to determine the effects of CIMT (constraint-induced movement therapy) on gait, balance, and motor functions of the lower extremity in stroke. The secondary aim was to determine the optimal dosage, application time, and duration of CIMT in the lower extremity in stroke. PubMed (1999-July 2021), Pedro (2000-December 2020), Google Scholar (1999-Febraury 2022), and Cochrane Library (2000-Febraury 2022) were searched in February 2022. The risk of bias was calculated through the criteria outlined in the (Cochrane-Handbook for Systematic-Reviews of Interventions). Eight RCTs were included in this review. CIMT was found to be effective in improving balance, gait, and motor functions of lower limbs; however, its superiority in comparison to the control group was not significant, no specific dosage was mentioned for lower limb CIMT as different studies used different durations and intensities of CIMT. Key Words: Cerebrovascular accident (CVA), Balance, Lower-extremity constraint-induced movement therapy (CIMT), Motor functions.\n",
25
+ "title": "Effects of Constraint-Induced Movement Therapy and Application Time and Duration of Intervention for Lower Extremity in Stroke: A Systematic Review."
26
+ },
27
+ {
28
+ "pmid": 38062605,
29
+ "abstract": "\nTo find the effectiveness of distal sodium channel blocks in managing lumbosacral radicular syndrome.\nOpen-labelled, non-randomised, single-group, prospective, pilot study. Place and Duration of the Study: Pain Clinic of Armed Forces Institute of Rehabilitation Medicine (AFIRM) Rawalpindi, Pakistan, from January to June 2022.\nPatients having low back pain radiating to L5\/S1\/both dermatomes with severity of numerical rating scale (NRS) score of more than 4\/10 were included. Straight leg raise (SLR) and NRS score were noted down at baseline and at 30 minutes, 24 hours, 1 week, and 4 weeks post-distal sodium channel block (DSCB). DSCB was performed at beta 1, 2, 3, and 5 portals using 2 ml of 2% injection plain lignocaine + 1 ml (40 mg) injection triamcinolone + 7 ml distilled water. Statistical analysis was done using Statistical Package for Social Sciences (SPSS) 21.\nOut of 50 patients, 24 (48%) were females and 26 (52%) were males. No serious procedural complications were noted. Post-DSCB, follow-up was done for 4 weeks. A significant fall in NRS and an increase in SLR score were observed at every visit. Results were statistically significant (p<0.001) when mean NRS and SLR scores at every follow-up were compared for pre- and post-DSCBs.\nDSCB reduced pain and improved SLR in patients even at 4 weeks of follow-up. Advantages included immediate pain relief, easy to perform as outdoor procedure, cost-effective and a time buying alternative procedure allowing for the analgesic effect of medicine to kick in.\nRadiculopathy, Low-back pain, Epidural spinal injection, Sciatica, Pain management, Distal sodium channel blocks.\n",
30
+ "title": "Effectiveness of Distal Sodium Channel Block in Managing Lumbosacral Radicular Syndrome: A Pilot Study."
31
+ },
32
+ {
33
+ "pmid": 38062606,
34
+ "abstract": "\nRampant and prevalent deployment of an efficient malaria vaccine in Pakistan, together with basic control and preventive measures, could significantly decrease the economic and healthcare burden caused by drug-resistant malaria. Moreover, RTS, S\/AS01 vaccine has attained a much-needed breakthrough after decades of growth, as an innovative vaccine for malaria in Phase III clinical trials, and presently undergoing implementation studies. So far Gavi, WHO, and other stakeholders are contemplating on the practical issues, risk-benefit, and cost-effectiveness in resource-limited settings of vaccine implementation capacity. Imminent advances, like using a delayed as well as enhanced protection, divided schedule for dosing, and alternate adjuvants are likely to attain the vital goal of eradication of malaria. Vaccination is a potentially critical component of efforts to arrest the development and dissemination of antimicrobial resistance; though little is known about the impact vaccination may have within low-and-middle-income countries. Key Words: Antimicrobial resistance, Malaria, Vaccine.\n",
35
+ "title": "Commodifying Vaccines to Curtail Antibiotic Resistance Impact in Malaria Endemic Countries."
36
+ },
37
+ {
38
+ "pmid": 38062604,
39
+ "abstract": "\nTo compare the efficacy of conchal cartilage graft and temporalis fascia graft in Type 1 tympanoplasty in terms of graft uptake and hearing improvement.\nDescriptive study. Place and Duration of the Study: Department of Otolaryngology, Khyber Teaching Hospital, Peshawar, Pakistan, from January 2020 till December 2022.\nUsing quota sampling, 2 groups were made i.e. conchal cartilage group (Group A) and temporalis fascia group (Group B). Total of 124 records were selected with 62 records from each group. Graft uptake rate and audiological outcomes were compared between the groups. Moreover, postoperative complication rate was also noted for each group.\nThe graft uptake rates between Group A and B at 3rd month were compared postoperatively (98.39%, 93.55%, p=0.36). The difference between preoperative mean air-bone gap (ABG, 28.05 \u00b1 2.19dB, 28.68 \u00b1 2.38 dB, p=0.12) and postoperative mean ABG (13.35 \u00b1 3.45, 14.47 \u00b1 3.29, p=0.69) was also statistically not significant. However, the differences regarding audiological success rate between cartilage and fascia groups (96.77%, 82.25%, p=0.01) and average operating time (51.8 \u00b1 2.1 vs. 43.5 \u00b1 3.2 minutes, p=0.009) were significantly different.\nIn chronic otitis media (COM) patients with subtotal perforations, endoscopic tympanoplasty using conchal cartilage or temporalis fascia as graft yielded comparable outcomes in terms of graft uptake, hearing improvement, and postoperative complications. However, using conchal cartilage, the procedure showed better audiological success rate. With temporalis fascia as graft, the procedure was performed in a shorter time.\nChronic otitis media, Tympanoplasty, Temporalis fascia, Conchal cartilage, Subtotal perforation.\n",
40
+ "title": "Audiological Outcomes of Type 1 Tympanoplasty Using Conchal Cartilage and Temporalis Fascia."
41
+ },
42
+ {
43
+ "pmid": 38062603,
44
+ "abstract": "\nTo identify predictive factors associated with the occurrence of postoperative pancreatic fistula (POPF) following pancreatoduodenectomy (PD) in an increasingly geriatric population.\nObservational study. Place and Duration of the Study: Department of General Surgery, Inonu University, Malatya, Turkey, from January 2010 to April 2022.\nDemographic and clinicopathological data of 74 geriatric patients who underwent PD for periampullary tumours in the clinic at Inonu University were retrieved from the patient database. POPF was defined and categorised based on the guidelines established by the International Study Group for Pancreatic Surgery (ISGPS). The patients were stratified into two cohorts of POPF and no POPF. Univariate and multivariate analyses were conducted to compare variables between the two groups.\nThe median age of the patient population was 72 (65-92) years, and 51 (68.9%) individuals were male. Among the 74 patients, 35 (47.3%) experienced POPF. In the multivariate analysis, hypertension (HT, p=0.012), Wirsung diameter <3.5 mm (p<0.01), and pancreaticojejunostomy (PJ, p=0.022) emerged as independent risk factors for POPF.\nIn the context of geriatric patients undergoing PD, HT, intraoperative wirsung diameter <3.5 mm, and PJ were identified as independent risk factors for POPF. These findings can guide the adoption of safer techniques in preoperative and intraoperative evaluations, as well as in postoperative follow-ups of patients presenting with these risk factors.\nElderly, Whipple procedure, Anastomotic leakage, Predictive factor, Postoperative pancreatic fistula, Pancreatoduodenectomy.\n",
45
+ "title": "Predictive Factors of Postoperative Pancreatic Fistula in Geriatric Patients Undergoing Pancreatoduodenectomy for Periampullary Malignancy."
46
+ },
47
+ {
48
+ "pmid": 38062610,
49
+ "abstract": "\nNull.\n",
50
+ "title": "Aortic Root and Ascending Aorta Replacement with Tricuspid Repair."
51
+ }
52
+ ]
data/curated_samples/pubmed_raw.json ADDED
The diff for this file is too large to render. See raw diff
 
data/curated_samples/s2orc_abstract_raw.json ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "corpusid": 9327831,
4
+ "openaccessinfo": {
5
+ "externalids": {
6
+ "MAG": null,
7
+ "ACL": null,
8
+ "DOI": null,
9
+ "PubMedCentral": null,
10
+ "ArXiv": null
11
+ },
12
+ "license": null,
13
+ "url": null,
14
+ "status": null
15
+ },
16
+ "abstract": "Our goal was to document somatosensory effects in speech motor adaptation that are related to facial skin deformation. The study assessed the role of somatosensory information in speech learning by focusing on the deformation of the facial skin and the motion of the lip. We found that facial skin deformation applied over the course of a series of training trials affected lip movements in a progressive and adaptive manner. The results suggest that the motor plan for the target task was modified in an adaptive manner on the basis of the skin stretch perturbation. This is consistent with the idea that somatosensory afferent input associated with skin deformation may modify the plan for articulatory motion in speech motor learning."
17
+ },
18
+ {
19
+ "corpusid": 190697212,
20
+ "openaccessinfo": {
21
+ "externalids": {
22
+ "MAG": "2133526683",
23
+ "ACL": null,
24
+ "DOI": null,
25
+ "PubMedCentral": null,
26
+ "ArXiv": null
27
+ },
28
+ "license": null,
29
+ "url": null,
30
+ "status": null
31
+ },
32
+ "abstract": "In deze notitie richten we ons op voorspellers van duurzame inzetbaarheid, waarbij we gebruik maken van de resultaten van de eerste twee metingen van STREAM en van een interviewstudie bij deelnemers aan STREAM. Meer specifiek gaan we na hoe personen met gezondheidsproblemen toch productief kunnen blijven en hoe verzuim voorkomen kan worden. Daarnaast gaan we na wat bepaalt of mensen blijven participeren in arbeid dan wel uitstromen uit arbeid. De belangrijkste bevindingen van een interviewstudie naar het behoud van productiviteit voor mensen met gezondheidsproblemen zijn: Om productiviteit op het werk bij gezondheidsproblemen te behouden of verlies te beperken zijn aanpassingen nodig. Deze kunnen betrekking hebben op het werk (werktijden, werkplek, taken), maar kunnen ook van sociale (collega\u2019s nemen sommige taken over) of persoonlijke (verandering van werkstijl) aard zijn. Belemmerende factoren voor productiviteitbehoud zijn psychische klachten, negatieve life events en een lage motivatie. Bevorderende factoren zijn psychisch welzijn, optimisme en een hoge motivatie. Positieve werkkenmerken en persoonlijke welzijn moeten gestimuleerd worden voordat gezondheidsproblemen optreden omdat deze factoren van belang zijn bij het doen van aanpassingen en het vinden van een balans tussen het vermogen van een persoon en de eisen van hun werk wanneer er wel gezondheidsproblemen aanwezig zijn. De belangrijkste bevindingen van een interviewstudie naar het behoud van productiviteit voor mensen met gezondheidsproblemen zijn:\u00b7 Om productiviteit op het werk bij gezondheidsproblemen te behouden of te beperken zijn aanpassingen nodig. Deze kunnen betrekking hebben op het werk (werktijden, werkplek, taken), maar kunnen ook van sociale (collega\u2019s nemen sommige taken over) of persoonlijke (verandering van werkstijl) aard zijn.- Belemmerende factoren voor productiviteitbehoud zijn psychische klachten, negatieve life events en een lage motivatie. Bevorderende factoren zijn psychisch welzijn, optimisme en een hoge motivatie.\u00b7 Positieve werkkenmerken en persoonlijke welzijn moeten gestimuleerd worden voordat er gezondheidsproblemen optreden omdat deze factoren van belang zijn bij het doen van aanpassingen en het vinden van een balans wanneer er wel gezondheidsproblemen aanwezig zijn. De belangrijkste bevindingen met betrekking tot de invloed van werkkenmerken en gezondheid op ziekteverzuim zijn:\u00b7 Vergeleken met andere type gezondheidsproblemen zijn psychische klachten het sterkst gerelateerd aan ziekteverzuim.\u00b7 Een lage autonomie (=mate waarin een werknemer in staat is zijn eigen werk te reguleren) en hoge taakeisen voorspellen langer verzuim (meer dan 9 dagen in het afgelopen jaar). \u00b7 Ongunstige werkomstandigheden verhogen de kans op verzuim, vooral bij mensen met gezondheidsproblemen.\u00b7 Vooral bij mensen met gezondheidsproblemen is het van belang om goede werkomstandigheden (niet te hoge taakeisen, veel autonomie) te bevorderen. Om voorspellers van blijvende participatie in het arbeidsproces te onderzoeken is het nodig om onderscheid te maken naar verschillende typen uitstroom. Daarom is onderscheid gemaakt in uitstroom naar werkloosheid, arbeidsongeschiktheid en vroegpensioen. De belangrijkste bevindingen zijn:\u00b7 Van de werkkenmerken draagt met name een goed sociaal klimaat bij aan blijvende participatie. Individuele kenmerken zoals opleiding, mastery (=greep hebben op je leven) en life events spelen een beperkte rol in blijvende participatie. Wel is een lage leerorientatie (minder gericht op leren in de werksituatie) voorspellend voor vroegpensioen.\u00b7 Een slechte fysieke en mentale gezondheid zijn belangrijke voorspellers voor de uitstroom naar werkloosheid en arbeidsongeschiktheid. Voor de uitstroom naar vroegpensioen geldt dit alleen voor fysieke gezondheid en niet voor mentale gezondheid.\u00b7 Factoren die de uitstroom naar vroegpensioen bevorderen zijn: een minder goed sociaal klimaat op het werk (gebrek aan waardering), een positieve attitude van de partner ten aanzien van stoppen met werken en de financiele mogelijkheid om met werken te stoppen."
33
+ },
34
+ {
35
+ "corpusid": 190295343,
36
+ "openaccessinfo": {
37
+ "externalids": {
38
+ "MAG": "88258626",
39
+ "ACL": null,
40
+ "DOI": null,
41
+ "PubMedCentral": null,
42
+ "ArXiv": null
43
+ },
44
+ "license": null,
45
+ "url": null,
46
+ "status": null
47
+ },
48
+ "abstract": "Sulawesi Tengah adalah salah satu dari 27 propinsi yang ada di lndonesia dengan Palu sebagai ibukotanya. Memiliki banyak tempat obyek wisata yang menarik, salah satunya pantai Tanjung Karang terletak di desa Labuan boyo. Pada saat ini pantai Tanjung Karang masih memiliki sarana dan prasarana yang masih kurang sehingga perlu diadakan pembenahan di bidang sarana dan prasarana agar wisatawan betah dan mau memperpanjang liburannya. Dari segi sarana yang perlu ditambahkan adalah restoran, money changer, Taman bermain anak, toko cinderamata, camping ground, dan pos keamanan. Sedangkan dari segi prasarana adalah perbaikan jalan, pemasangan jaringan telepon, pemasangan jaringan listrik dan pemasangan pipa air minum. Sebaiknya diadakan tari-tarian dilobi hotel agar suasana lebih meriah selain perlu diadakannya pengembangan juga perlu dilakukan usaha promosi yang selama ini masih kurang dilakukan oleh pemerintah daerah. Agar semua pengembang dan promosi dapat berjalan dengan lancar diperlukan dana yang berasal dari pemerintah daerah dan pengelola cottage."
49
+ },
50
+ {
51
+ "corpusid": 222133355,
52
+ "openaccessinfo": {
53
+ "externalids": {
54
+ "MAG": "3089555664",
55
+ "ACL": null,
56
+ "DOI": null,
57
+ "PubMedCentral": null,
58
+ "ArXiv": "2010.01530"
59
+ },
60
+ "license": null,
61
+ "url": null,
62
+ "status": null
63
+ },
64
+ "abstract": "Given a quasi-transitive infinite graph $G$ with volume growth rate ${\\rm gr}(G),$ a transient biased electric network $(G,\\, c_1)$ with bias $\\lambda_1\\in (0,\\,{\\rm gr}(G))$ and a recurrent biased one $(G,\\, c_2)$ with bias $\\lambda_2\\in ({\\rm gr}(G),\\infty).$ Write $G(p)$ for the Bernoulli-$p$ bond percolation on $G$ defined by the grand coupling. Let $(G,\\, c_1,\\, c_2,\\, p)$ be the following biased disordered random network: Open edges $e$ in $G(p)$ take the conductance $c_1(e)$, and closed edges $g$ in $G(p)$ take the conductance $c_2(g)$. Our main results are as follows: (i) On connected quasi-transitive infinite graph $G$ with percolation threshold $p_c\\in (0,\\, 1),$ $(G,\\, c_1,\\, c_2,\\, p)$ has a non-trivial recurrence\/transience phase transition such that the threshold $p_{c}^{*}\\in (0,\\, 1)$ is deterministic, and almost surely $(G,\\, c_1,\\, c_2,\\, p)$ is recurrent for $p p_c^*.$ There is a non-trivial recurrence\/transience phase transition for $(G,\\, c_1,\\, c_2,\\, p)$ with $G$ being a Cayley graph if and only if the corresponding group is not virtually $\\mathbb{Z}$. (ii) On $\\mathbb{Z}^d$ for any $d\\geq 1,$ $p_c^{*}= p_c$. And on $d$-regular trees $\\mathbb{T}^d$ with $d\\geq 3$, $p_c^{*}=(\\lambda_1\\vee 1) p_c$, and thus $p_c^{*}>p_c$ for any $\\lambda_1\\in (1,\\,{\\rm gr}(\\mathbb{T}^d)).$ As a contrast, we also consider phase transition of having unique currents or not for $(\\mathbb{Z}^d,\\, c_1,\\, c_2,\\, p)$ with $d\\geq 2$ and prove that almost surely $(\\mathbb{Z}^2,\\, c_1,\\, c_2,\\, p)$ with $\\lambda_1<1\\leq\\lambda_2$ has unique currents for any $p\\in [0,1]$."
65
+ },
66
+ {
67
+ "corpusid": 192557628,
68
+ "openaccessinfo": {
69
+ "externalids": {
70
+ "MAG": "2339095755",
71
+ "ACL": null,
72
+ "DOI": null,
73
+ "PubMedCentral": null,
74
+ "ArXiv": null
75
+ },
76
+ "license": null,
77
+ "url": null,
78
+ "status": null
79
+ },
80
+ "abstract": "Introduction : Les medecins qui soignent des personnes atteintes de demence moderee ou grave s'interrogent souvent sur la pertinence de leur prescrire des medicaments a visee preventive. Ces medicaments peuvent diminuer le risque de complication a long terme mais n'apportent aucun benefice a court terme. De plus, l'administration de ces medicaments peut causer des inconforts au patient en raison des troubles cognitifs et de la dysphagie, frequente a ce stade de la maladie. La valeur de ces traitements peut aussi etre remise en question car les personnes dementes qui sont souvent peu mobiles et ont une esperance de vie ecourtee sont exclues des essais cliniques ayant demontre l'utilite de ces molecules en geriatrie. Les lignes directrices publiees sont peu utiles parce qu'elles ne se prononcent pas sur la conduite a tenir dans ce contexte clinique. Finalement, au plan ethique, les volontes du patient en cas d'inaptitude mentale indiquent le plus souvent le desir de ne recevoir que des soins de confort a ce stade ce qui peut etre interprete comme une demande de cesser ces medicaments. Objectifs et Methodes : En avril 2004, 18 medecins generalistes, 5 geriatres, 1 interniste (aussi pharmaco-epidemiologue), 1 geronto-psychiatre, 5 pharmaciens, 1 infirmiere et 4 personnes impliquees en ethique clinique se sont reunis a la demande de l'Institut Universitaire de Geriatrie de Sherbrooke afin de discuter des propositions d'un comite ayant recense et resume les ecrits sur ce sujet. A la suite de cette rencontre, un document synthese a ete prepare et envoye aux participants pour obtenir leur approbation. Bien que chaque participant ait pu commenter le document et se prononcer sur les recommandations generales, on a demande seulement aux medecins et pharmaciens d'approuver les recommandations specifiques a propos des medicaments. Tous ont accepte a l'exception d'une pharmacienne qui n'a pu etre rejointe. Recommandations: Les quatre recommandations generales sont les suivantes: \"Pour une personne atteinte de demence moderee ou grave, le medecin devrait: - Prescrire des medicaments a visee preventive seulement aux personnes ayant une esperance de vie suffisante pour en beneficier - Eviter de prescrire des medicaments autres que ceux qui visent le confort lorsque l'esperance de vie est courte ou que la qualite de vie apparait pauvre a ceux qui ont a coeur le meilleur interet du patient - S'abstenir de prescrire des medicaments a visee preventive aux personnes pour qui la prise de medicaments comporte trop d'inconvenients - Lorsqu'il y a doute, prescrire un medicament potentiellement utile seulement pour une periode determinee puis reevaluer la pertinence de continuer apres cette periode\". Des recommandations specifiques a propos de l'utilisation des statines, des bisphosphonates et du clopidogrel dans ce contexte sont aussi presentees. Finalement, un guide de raisonnement clinique est propose dans le but d'aider le clinicien a determiner la pertinence de tout autre traitement dans des contextes semblables. C'est la premiere fois que des lignes directrices sur ce sujet sont publiees."
81
+ },
82
+ {
83
+ "corpusid": 131096972,
84
+ "openaccessinfo": {
85
+ "externalids": {
86
+ "MAG": "2269721527",
87
+ "ACL": null,
88
+ "DOI": null,
89
+ "PubMedCentral": null,
90
+ "ArXiv": null
91
+ },
92
+ "license": null,
93
+ "url": null,
94
+ "status": null
95
+ },
96
+ "abstract": "Growing legume fallow crops has proven to be an important factor in reducing the yield decline effect in sugarcane production. Legumes can also provide a direct economic benefit to sugarcane farmers by providing a source of nitrogen. Further, in some instances, income can flow from the sale, of grain or seed. The following case study provides an insight into the changes made by Russell Young, a sugarcane farmer situated in the Rita Island area of the Burdekin district. The case study focuses on the economics of the old farming system versus a new farming system. \n \nThe old farming system is based on the conventional farming practices previously used by the Young family in 2002 compared to the 2006 farming system which involves a reduction in tillage practices and use of a Soybean rotational crop for seed production. A whole-of-farm was used to assess the impact of the new farming system on farm profitability. A whole-of-farm economic analysis looks at the impact of a change in farming practice across the whole business, rather than focusing on one single component. This case study is specific to an individual grower\u2019s situation and is not representative of all situations. When evaluating a farming system change, it is important to have a detailed plan."
97
+ },
98
+ {
99
+ "corpusid": 20531367,
100
+ "openaccessinfo": {
101
+ "externalids": {
102
+ "MAG": "1486251644",
103
+ "ACL": null,
104
+ "DOI": null,
105
+ "PubMedCentral": null,
106
+ "ArXiv": null
107
+ },
108
+ "license": null,
109
+ "url": null,
110
+ "status": null
111
+ },
112
+ "abstract": "This paper investigates the potential for applying electricity saving projects as a method of reducing the environmental pollution of mines. The study will identify a systematic approach to minimising the mining industry's impact on water and air pollution. A simulation is done to quantify the environmental savings associated with projects done by Energy Services Companies (ESCOs). These savings are then compared to similar projects that have been implemented at mines. Eskom-IDM projects of HVAC International are used as case studies. These electricity and environmental pollution reduction projects are combined into a systematic approach to enhance \u201cgreen mining\u201d. This study will propose a novel reporting system that consolidates operational ratios that aid ESCOs in identify project potential in an effort to reduce water and air pollution. Significant pollution reduction potential is found and an integrated reporting system is identified. This paper concludes by proposing an online system that can aid governing bodies in monitoring polluting mines. The paper is expanded further and production ratios, which can aid in the identification of project potential, are proposed. The study is limited to water and air pollution as a result of excessive water usage practices on mines in the Witwatersrand region."
113
+ },
114
+ {
115
+ "corpusid": 169586887,
116
+ "openaccessinfo": {
117
+ "externalids": {
118
+ "MAG": "129712120",
119
+ "ACL": null,
120
+ "DOI": null,
121
+ "PubMedCentral": null,
122
+ "ArXiv": null
123
+ },
124
+ "license": null,
125
+ "url": null,
126
+ "status": null
127
+ },
128
+ "abstract": "L'A. s'interesse dans cet article a l'interdependance des concepts de continuite et de changement a travers l'histoire de l'utilisation de l'ecriture. Sur l'exemple de l'irlandais celtique et du maldive indo-aryen, il demontre que les developpements historico-linguistiques sont responsables des divergences entre l'inventaire phonematique d'une langue et son utilisation pour l'inventaire graphemique correspondant. L'A. utilise les concepts de continuite et de changement pour caracteriser ces divergences et montre que le changement consonantique mene en principe a l'amoindrissement de l'equivalence et de la coherence. L'analyse sur des inscriptions maldives montre que l'utilisation de l'ecriture est issue d'une longue tradition historique."
129
+ },
130
+ {
131
+ "corpusid": 113344833,
132
+ "openaccessinfo": {
133
+ "externalids": {
134
+ "MAG": "2347612024",
135
+ "ACL": null,
136
+ "DOI": null,
137
+ "PubMedCentral": null,
138
+ "ArXiv": null
139
+ },
140
+ "license": null,
141
+ "url": null,
142
+ "status": null
143
+ },
144
+ "abstract": "The remote monitoring of equipment was realized based on GSM short messages and programmable logic control (PLC) technology. The PLC control was combined with mobiles\u2019 short messages. This paper made a study of the hardware interface between the programmable logic controller (PLC) and the mobile phone and the working principle with which the PLC controlled the mobile phone. This paper focused that the GSM module controlled by a PLC was used to receive and send Chinese short messages, as well as the important aspect in the process of receiving and decoding Chinese short messages. The PLC embedded in the mobile phone control technology was adopted to realize the remote data acquisition of important parameters and real-time alarming function in the robotic automatic polishing system."
145
+ },
146
+ {
147
+ "corpusid": 139210843,
148
+ "openaccessinfo": {
149
+ "externalids": {
150
+ "MAG": "2757042771",
151
+ "ACL": null,
152
+ "DOI": null,
153
+ "PubMedCentral": null,
154
+ "ArXiv": null
155
+ },
156
+ "license": null,
157
+ "url": null,
158
+ "status": null
159
+ },
160
+ "abstract": "The method for the repair of a defective portion of a constant cartilaginous tissue, a method of creating a certain cartilage repair device (20) and certain cartilage repair device is disclosed. In a method for the repair of a defective portion of the constant cartilaginous tissue, for example, certain device comprising a constant scaffold such certain extracellular matrix material is implanted into the defect site , certain biological lubricant (234) is administered to the defect site. Further, the device (20) has a constant scaffold (100), and certain biological lubricant such as extracellular matrix material generating a constant natural (234)."
161
+ }
162
+ ]
data/curated_samples/s2orc_raw.json ADDED
The diff for this file is too large to render. See raw diff
 
data/curated_samples/stackexchange_extract.json ADDED
The diff for this file is too large to render. See raw diff
 
data/curated_samples/stackexchange_raw.json ADDED
@@ -0,0 +1,242 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "Id": 1,
4
+ "PostTypeId": 1,
5
+ "AcceptedAnswerId": 27.0,
6
+ "CreationDate": "2014-09-16T18:19:05.860",
7
+ "Score": 36,
8
+ "ViewCount": 2367.0,
9
+ "Body": "<p>Assume we have an Earth-like environment. <\/p>\n\n<p>If a tribe with only early medieval technology were to settle in a mountain environment: <\/p>\n\n<ul>\n<li><p>What resources would be needed to deal with the elevation, cold, and other hazards?<\/p><\/li>\n<li><p>What resources are they likely to have available?<\/p><\/li>\n<\/ul>\n",
10
+ "OwnerUserId": 10.0,
11
+ "LastEditorUserId": 68902.0,
12
+ "LastEditDate": "2019-10-10T20:53:25.563",
13
+ "LastActivityDate": "2019-10-10T20:53:25.563",
14
+ "Title": "How to Survive Mountain Environments With a Tribal Culture?",
15
+ "Tags": "|environment|earth-like|climate|mountains|altitude|",
16
+ "AnswerCount": 4.0,
17
+ "CommentCount": 2,
18
+ "ContentLicense": "CC BY-SA 4.0",
19
+ "ParentId": null,
20
+ "ClosedDate": null,
21
+ "FavoriteCount": null,
22
+ "LastEditorDisplayName": null,
23
+ "OwnerDisplayName": null,
24
+ "CommunityOwnedDate": null
25
+ },
26
+ {
27
+ "Id": 2,
28
+ "PostTypeId": 1,
29
+ "AcceptedAnswerId": 48.0,
30
+ "CreationDate": "2014-09-16T18:21:10.853",
31
+ "Score": 139,
32
+ "ViewCount": 9055.0,
33
+ "Body": "<p>Creating an entire world is a daunting prospect, though something I've thought about often. <\/p>\n\n<p>Ideas about different regions, religions, people, history and relationships all blending into a cohesive world come half-formed to me. But how can I go about turning these into a believable world with a foundation for adventure? What steps have those of you who have built entire worlds taken in the past to break this task down into manageable chunks?<\/p>\n",
34
+ "OwnerUserId": 15.0,
35
+ "LastEditorUserId": 15.0,
36
+ "LastEditDate": "2019-10-14T01:07:15.283",
37
+ "LastActivityDate": "2019-10-14T01:07:15.283",
38
+ "Title": "How can I break down the task of creating a world into manageable chunks?",
39
+ "Tags": "|worldbuilding-process|",
40
+ "AnswerCount": 11.0,
41
+ "CommentCount": 1,
42
+ "ContentLicense": "CC BY-SA 3.0",
43
+ "ParentId": null,
44
+ "ClosedDate": null,
45
+ "FavoriteCount": null,
46
+ "LastEditorDisplayName": null,
47
+ "OwnerDisplayName": null,
48
+ "CommunityOwnedDate": null
49
+ },
50
+ {
51
+ "Id": 3,
52
+ "PostTypeId": 1,
53
+ "AcceptedAnswerId": 6.0,
54
+ "CreationDate": "2014-09-16T18:28:12.400",
55
+ "Score": 156,
56
+ "ViewCount": 113671.0,
57
+ "Body": "<p>How many humans are needed in order to sustain and grow a steady population in a habitable area? Assume resources are adequate and the environment is favorable.<\/p>\n\n<p>To further refine this question, let's assume this is an early spacefaring technology scenario, such as putting a colony on a neighboring planet (same system) with earth-like conditions with a technology level either current or near-future. Assume the population has a mix of professions suitable for the new environment (resource gatherers, physicians, etc.).<\/p>\n",
58
+ "OwnerUserId": 10.0,
59
+ "LastEditorUserId": 2113.0,
60
+ "LastEditDate": "2019-08-11T02:09:33.310",
61
+ "LastActivityDate": "2022-10-01T06:26:39.580",
62
+ "Title": "What is the minimum human population necessary for a sustainable colony?",
63
+ "Tags": "|society|colonies|humans|",
64
+ "AnswerCount": 21.0,
65
+ "CommentCount": 16,
66
+ "ContentLicense": "CC BY-SA 4.0",
67
+ "ParentId": null,
68
+ "ClosedDate": null,
69
+ "FavoriteCount": null,
70
+ "LastEditorDisplayName": null,
71
+ "OwnerDisplayName": null,
72
+ "CommunityOwnedDate": null
73
+ },
74
+ {
75
+ "Id": 4,
76
+ "PostTypeId": 2,
77
+ "AcceptedAnswerId": null,
78
+ "CreationDate": "2014-09-16T18:29:58.093",
79
+ "Score": 12,
80
+ "ViewCount": null,
81
+ "Body": "<p>The human body would adapt and deal with elevation. Many top athletes often train at high altitudes to help train their bodies to absorb oxygen more efficiently from the thinner air.<\/p>\n\n<p>If the terrain was pure mountains then the ability to adapt to the local geography would be key. Natural shelters in the forms of caves for early survival, though a medieval culture would be able to extend and possibly even create man-made caves for habitation over time. This will help them deal with the cold and natural predators as our, and their ancestors did. Fires at the entrance to the cave would ward off predators and help keep the cold at bay.<\/p>\n\n<p>The most significant hazards would be natural. Snow and ice would create dangerous living and working conditions. The constant need for food would also be a source of difficulty, with meat being a large part of the diet. For the survival of a large settlement, farming is necessary, though in mountains this will be near impossible limiting settlements to a small size.<\/p>\n\n<p>Cave ins, earth quakes, avalanches, all sorts of natural disasters would affect their daily lives. We struggle to deal with these in this day and age. They would simply be a fact of life for those in the medieval age that they had to contend with and get through. Any one serious act could end the settlement though.<\/p>\n",
82
+ "OwnerUserId": 15.0,
83
+ "LastEditorUserId": null,
84
+ "LastEditDate": null,
85
+ "LastActivityDate": "2014-09-16T18:29:58.093",
86
+ "Title": null,
87
+ "Tags": null,
88
+ "AnswerCount": null,
89
+ "CommentCount": 0,
90
+ "ContentLicense": "CC BY-SA 3.0",
91
+ "ParentId": 1.0,
92
+ "ClosedDate": null,
93
+ "FavoriteCount": null,
94
+ "LastEditorDisplayName": null,
95
+ "OwnerDisplayName": null,
96
+ "CommunityOwnedDate": null
97
+ },
98
+ {
99
+ "Id": 5,
100
+ "PostTypeId": 1,
101
+ "AcceptedAnswerId": 14.0,
102
+ "CreationDate": "2014-09-16T18:34:55.810",
103
+ "Score": 7,
104
+ "ViewCount": 264.0,
105
+ "Body": "<p>What one concept\/ideology is most important in a group of people to encourage them to stop acting as individuals and begin acting in the interest of the whole settlement?<\/p>\n",
106
+ "OwnerUserId": 10.0,
107
+ "LastEditorUserId": 147.0,
108
+ "LastEditDate": "2015-08-05T20:50:52.537",
109
+ "LastActivityDate": "2015-08-05T20:50:52.537",
110
+ "Title": "Creating order in a settlement",
111
+ "Tags": "|law|",
112
+ "AnswerCount": 5.0,
113
+ "CommentCount": 0,
114
+ "ContentLicense": "CC BY-SA 3.0",
115
+ "ParentId": null,
116
+ "ClosedDate": "2014-09-17T18:49:44.973",
117
+ "FavoriteCount": null,
118
+ "LastEditorDisplayName": null,
119
+ "OwnerDisplayName": null,
120
+ "CommunityOwnedDate": null
121
+ },
122
+ {
123
+ "Id": 6,
124
+ "PostTypeId": 2,
125
+ "AcceptedAnswerId": null,
126
+ "CreationDate": "2014-09-16T18:40:35.627",
127
+ "Score": 82,
128
+ "ViewCount": null,
129
+ "Body": "<p>This is quite dependent on technological\/medical knowledge.<\/p>\n<p>For example, it has been <a href=\"http:\/\/www.newscientist.com\/article\/dn1936-magic-number-for-space-pioneers-calculated.html#.VBiC_XtDLwo\" rel=\"nofollow noreferrer\">calculated at 160 for space exploration<\/a>, provided the explorers return home after 20 generations. This of course assumes pretty good medical availability and actually fairly low risks to individuals. The resultant reduction in genetic variability has been analyzed as not being very detrimental. Certainly it <em>could<\/em> but not guaranteed to and even small infusions of genetic material would majorly reduce negatives. Also starting out with high genetic variability will help reduce those risks.<\/p>\n<p>A good example of a relatively genetically non-variable population is Ashkenazi jews; a <a href=\"http:\/\/www.iflscience.com\/health-and-medicine\/dna-traces-jewish-history\" rel=\"nofollow noreferrer\">recent study<\/a> has suggested that in semi-recent history (25-30 generations) their total population was around 350 individuals; yes there are some genetic defects that are common but there is no generalized unhealthiness (or genetic unthriftiness), and some genetic defects are rare because they weren't very present in that initial population group.<\/p>\n<p>I would say that 160 is a fairly good minimum for a colony; yes- less than that may survive but any minor disaster has an unpleasantly high chance of wiping out the colony.<\/p>\n<p>The lower the technology\/medical treatment availability the higher the population would need to be to provide a 'buffer' for injuries.<\/p>\n",
130
+ "OwnerUserId": 16.0,
131
+ "LastEditorUserId": -1.0,
132
+ "LastEditDate": "2022-10-01T04:41:16.683",
133
+ "LastActivityDate": "2022-10-01T04:41:16.683",
134
+ "Title": null,
135
+ "Tags": null,
136
+ "AnswerCount": null,
137
+ "CommentCount": 4,
138
+ "ContentLicense": "CC BY-SA 4.0",
139
+ "ParentId": 3.0,
140
+ "ClosedDate": null,
141
+ "FavoriteCount": null,
142
+ "LastEditorDisplayName": null,
143
+ "OwnerDisplayName": null,
144
+ "CommunityOwnedDate": null
145
+ },
146
+ {
147
+ "Id": 7,
148
+ "PostTypeId": 2,
149
+ "AcceptedAnswerId": null,
150
+ "CreationDate": "2014-09-16T18:42:56.377",
151
+ "Score": 21,
152
+ "ViewCount": null,
153
+ "Body": "<p>This is known as the Minimum Viable Population, and many computer models and studies based on various circumstances and species have been run.<\/p>\n\n<p>For Humans, including the desire to ward of genetic defects due to inbreeding the median MVP reported is 4,169 individuals. You can read up more on this on the <a href=\"http:\/\/en.wikipedia.org\/wiki\/Minimum_viable_population\">wiki article here<\/a>.<\/p>\n",
154
+ "OwnerUserId": 15.0,
155
+ "LastEditorUserId": null,
156
+ "LastEditDate": null,
157
+ "LastActivityDate": "2014-09-16T18:42:56.377",
158
+ "Title": null,
159
+ "Tags": null,
160
+ "AnswerCount": null,
161
+ "CommentCount": 5,
162
+ "ContentLicense": "CC BY-SA 3.0",
163
+ "ParentId": 3.0,
164
+ "ClosedDate": null,
165
+ "FavoriteCount": null,
166
+ "LastEditorDisplayName": null,
167
+ "OwnerDisplayName": null,
168
+ "CommunityOwnedDate": null
169
+ },
170
+ {
171
+ "Id": 8,
172
+ "PostTypeId": 1,
173
+ "AcceptedAnswerId": 36.0,
174
+ "CreationDate": "2014-09-16T18:43:54.767",
175
+ "Score": 83,
176
+ "ViewCount": 9491.0,
177
+ "Body": "<p>Most settings with a magical component, whether this means wizards, magical races, or something similar, seem to be parked at a medieval level of development in terms of technology and society. Is this just because that's how the \"high fantasy\" genre developed, or is there some fundamental reason that having magic in a modern\/future\/high-tech world would be a bad idea?<\/p>\n",
178
+ "OwnerUserId": 28.0,
179
+ "LastEditorUserId": null,
180
+ "LastEditDate": null,
181
+ "LastActivityDate": "2022-01-16T11:56:21.520",
182
+ "Title": "Must magic be tied to medieval tech?",
183
+ "Tags": "|technology|magic|",
184
+ "AnswerCount": 17.0,
185
+ "CommentCount": 21,
186
+ "ContentLicense": "CC BY-SA 3.0",
187
+ "ParentId": null,
188
+ "ClosedDate": null,
189
+ "FavoriteCount": null,
190
+ "LastEditorDisplayName": null,
191
+ "OwnerDisplayName": null,
192
+ "CommunityOwnedDate": null
193
+ },
194
+ {
195
+ "Id": 10,
196
+ "PostTypeId": 1,
197
+ "AcceptedAnswerId": 32.0,
198
+ "CreationDate": "2014-09-16T18:50:12.500",
199
+ "Score": 27,
200
+ "ViewCount": 1784.0,
201
+ "Body": "<p>If magic were to manifest in the modern (present) age, how would first world governments attempt to classify and regulate its usage? To be more specific (and an example), if the USA created a Department of Magical Affairs, what would the primary and secondary concerns of such an agency be?<\/p>\n\n<p>Further clarification, let's assume that magic is a field of study that anyone can learn with proper research, time, and dedication, but innate talent will cause variations in the extent\/limits of power.<\/p>\n",
202
+ "OwnerUserId": 10.0,
203
+ "LastEditorUserId": 885.0,
204
+ "LastEditDate": "2017-03-17T08:07:42.943",
205
+ "LastActivityDate": "2017-03-17T08:07:42.943",
206
+ "Title": "Regulating the usage of magic in the modern era",
207
+ "Tags": "|magic|ethics|law|modern-age|",
208
+ "AnswerCount": 5.0,
209
+ "CommentCount": 2,
210
+ "ContentLicense": "CC BY-SA 3.0",
211
+ "ParentId": null,
212
+ "ClosedDate": null,
213
+ "FavoriteCount": 0.0,
214
+ "LastEditorDisplayName": null,
215
+ "OwnerDisplayName": null,
216
+ "CommunityOwnedDate": null
217
+ },
218
+ {
219
+ "Id": 11,
220
+ "PostTypeId": 2,
221
+ "AcceptedAnswerId": null,
222
+ "CreationDate": "2014-09-16T18:51:09.217",
223
+ "Score": 3,
224
+ "ViewCount": null,
225
+ "Body": "<p>Not at all, D&amp;D 3.5 released Eberron, which has a much higher level of technology, often powered by magic. It's often compared to the late 19th, early 20th century in terms of the level of technology involved.<\/p>\n\n<p>There's also the Rifts universe that has high tech, with lasers and mechs, but also incorporates magic and psionics. They specifically have a tech called Techno-Wizardry, which mixes the two together.<\/p>\n\n<p>These are just the two universes I could think of off the top of my head. Whilst it's true that a large number of high-fantasy games use magic, it's certainly not exclusively that way.<\/p>\n",
226
+ "OwnerUserId": 15.0,
227
+ "LastEditorUserId": null,
228
+ "LastEditDate": null,
229
+ "LastActivityDate": "2014-09-16T18:51:09.217",
230
+ "Title": null,
231
+ "Tags": null,
232
+ "AnswerCount": null,
233
+ "CommentCount": 0,
234
+ "ContentLicense": "CC BY-SA 3.0",
235
+ "ParentId": 8.0,
236
+ "ClosedDate": null,
237
+ "FavoriteCount": null,
238
+ "LastEditorDisplayName": null,
239
+ "OwnerDisplayName": null,
240
+ "CommunityOwnedDate": null
241
+ }
242
+ ]
data/curated_samples/wiki.json ADDED
The diff for this file is too large to render. See raw diff
 
data/dataset_details.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:433690b6f6c419fcfbc0733737c65758d827da37c4298ecdf603020f88fe2bd7
3
+ size 353
data/dataset_inclusion.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:600912f6c6b4d575d714269d0d10527a223726c60c718ae3ac1c3a60933d09fa
3
+ size 426
data/dataset_inclusion_size.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b04f6d2103fee6296c88fc3ba591cb9c97d4982deabcc1cc5e5d6af1146b5ac4
3
+ size 571
data/line_info.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3fdfa7094ba058aa99e6fe30438b5a1840dc4aaff5c0c58ded22f8996e8d1013
3
+ size 473956
data/lorem_ipsum.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d42ef03e41071fe18f57b935ea471000187e2c941f3b8bc80829ac7aa698e7e
3
+ size 64276
data/mbzuai-llm-us-east-1 - S3 bucket _ S3 _ us-east-1.mhtml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:768834f236e56f2a569e0792a1a1f23a31a55ca4689cc88c42f73a9f2951dcec
3
+ size 6116532
data/meta_non_web.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bbfc1c5e0bf0c0f4e55cd4820965ce82e2107fb1fcdd56c8ed88ee5a97ac4a9d
3
+ size 2891
data/non_web_urls.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7a025ace5cc33dac83ffbd18463916c7ace2e54ac980cae9d984170df494e37
3
+ size 853
data/repeat_line_frac.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe89bff235e1a29bf456070c93a370a32db76fa5cefd178d0abd105759d21c91
3
+ size 555447
data/sample.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:546ef36d72c0909ea9db817ee3dbe1a7f6a36852e2c7b6ca01cc3a7021c1b01c
3
+ size 2267
data/sample_bad_urls.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8809f6b589a2fc02065752067f85ca40322d8b39a59b981d7aca084107c363a1
3
+ size 3237
data/sample_doc_stat.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07265a2951bc1b002de545de79d9fc0ce278998aa25e676b0510035683f4e6d2
3
+ size 327561
data/sample_dup_ngram.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:832e72c91e03588ab9d1d2803bafc1d2e456c99e1307c85f81016155b0ea32f7
3
+ size 740783
data/sample_en_low.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1cab3a6c96bf092cd3b66f873c13ab9a4ca76a3af9f710da720aa77e5abf63fd
3
+ size 483369
data/sample_java.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a84dd3e4183645de4b63717738bde243d2b6417eb80ab3c392a16155633b4c4
3
+ size 168124
data/sample_non_en.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:333cf94d86f22d1fc86b1cc109944c19e5a95cc9a3d94e59293d100b66035c4d
3
+ size 518993
data/sample_refinedweb_line.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c21d8069933f67b8c376e526bf3a7eef710f4c87c027763a926a4a214e473d2
3
+ size 425979
data/sample_terminal_punc.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1ead905bd8336a9ab1c0da9cfbb7b532ba4963e8d136f365389c712492192bd
3
+ size 678610
data/sample_top_ngram.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d7caf9cc97ea20ba0f86ffcf6cc9c142774a1daa3dc4b594a0dda55c11c9abe
3
+ size 113826
data/sample_url_exclusion.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70974b57d305190da7ee43cd75ad4ff2e4f22cce9250553b13bd6f11ba262d7d
3
+ size 3523
data/sample_warc.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:96bf524d0ddfc56ebba4d2ae6ebdea91381e97451a1cb65f70813d5aec002c42
3
+ size 196075
data/sample_wet.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eff9259c8117d9f697d37ef1fde4a96eac7a39bab537eb8fa4fa883bf130f0d8
3
+ size 433080